Mercurial > repos > cstrittmatter > ss2v110
annotate build/scripts-3.6/SeqSero2_package.py @ 14:047956dacae4 draft default tip
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
author | cstrittmatter |
---|---|
date | Thu, 21 May 2020 22:01:02 -0400 |
parents | e6437d423693 |
children |
rev | line source |
---|---|
10
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1 #!/Users/charles.strittmatter/miniconda3/bin/python |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
2 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
3 import sys |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
4 import time |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
5 import random |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
6 import os |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
7 import subprocess |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
8 import gzip |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
9 import io |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
10 import pickle |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
11 import argparse |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
12 import itertools |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
13 from distutils.version import LooseVersion |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
14 from distutils.spawn import find_executable |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
15 sys.path.insert(1,sys.path[0]+'/..') |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
16 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
17 try: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
18 from .version import SeqSero2_version |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
19 except Exception: #ImportError |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
20 from version import SeqSero2_version |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
21 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
22 ### SeqSero Kmer |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
23 def parse_args(): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
24 "Parse the input arguments, use '-h' for help." |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
25 parser = argparse.ArgumentParser(usage='SeqSero2_package.py -t <data_type> -m <mode> -i <input_data> [-d <output_directory>] [-p <number of threads>] [-b <BWA_algorithm>]\n\nDevelopper: Shaokang Zhang (zskzsk@uga.edu), Hendrik C Den-Bakker (Hendrik.DenBakker@uga.edu) and Xiangyu Deng (xdeng@uga.edu)\n\nContact email:seqsero@gmail.com\n\nVersion: v1.1.1')#add "-m <data_type>" in future |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
26 parser.add_argument("-i",nargs="+",help="<string>: path/to/input_data",type=os.path.abspath) ### add 'type=os.path.abspath' to generate absolute path of input data. |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
27 parser.add_argument("-t",choices=['1','2','3','4','5','6'],help="<int>: '1' for interleaved paired-end reads, '2' for separated paired-end reads, '3' for single reads, '4' for genome assembly, '5' for nanopore fasta, '6' for nanopore fastq") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
28 parser.add_argument("-b",choices=['sam','mem'],default="mem",help="<string>: algorithms for bwa mapping for allele mode; 'mem' for mem, 'sam' for samse/sampe; default=mem; optional; for now we only optimized for default 'mem' mode") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
29 parser.add_argument("-p",default="1",help="<int>: number of threads for allele mode, if p >4, only 4 threads will be used for assembly since the amount of extracted reads is small, default=1") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
30 parser.add_argument("-m",choices=['k','a'],default="a",help="<string>: which workflow to apply, 'a'(raw reads allele micro-assembly), 'k'(raw reads and genome assembly k-mer), default=a") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
31 parser.add_argument("-n",help="<string>: optional, to specify a sample name in the report output") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
32 parser.add_argument("-d",help="<string>: optional, to specify an output directory name, if not set, the output directory would be 'SeqSero_result_'+time stamp+one random number") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
33 parser.add_argument("-c",action="store_true",help="<flag>: if '-c' was flagged, SeqSero2 will only output serotype prediction without the directory containing log files") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
34 parser.add_argument("-s",action="store_true",help="<flag>: if '-s' was flagged, SeqSero2 will not output header in SeqSero_result.tsv") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
35 parser.add_argument("--check",action="store_true",help="<flag>: use '--check' flag to check the required dependencies") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
36 parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + SeqSero2_version) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
37 return parser.parse_args() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
38 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
39 ### check paths of dependencies |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
40 check_dependencies = parse_args().check |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
41 dependencies = ['bwa','samtools','blastn','fastq-dump','spades.py','bedtools','SalmID.py'] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
42 if check_dependencies: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
43 for item in dependencies: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
44 ext_path = find_executable(item) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
45 if ext_path is not None: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
46 print ("Using "+item+" - "+ext_path) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
47 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
48 print ("ERROR: can not find "+item+" in PATH") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
49 sys.exit() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
50 ### end of --check |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
51 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
52 def reverse_complement(sequence): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
53 complement = { |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
54 'A': 'T', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
55 'C': 'G', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
56 'G': 'C', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
57 'T': 'A', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
58 'N': 'N', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
59 'M': 'K', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
60 'R': 'Y', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
61 'W': 'W', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
62 'S': 'S', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
63 'Y': 'R', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
64 'K': 'M', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
65 'V': 'B', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
66 'H': 'D', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
67 'D': 'H', |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
68 'B': 'V' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
69 } |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
70 return "".join(complement[base] for base in reversed(sequence)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
71 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
72 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
73 def createKmerDict_reads(list_of_strings, kmer): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
74 kmer_table = {} |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
75 for string in list_of_strings: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
76 sequence = string.strip('\n') |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
77 for i in range(len(sequence) - kmer + 1): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
78 new_mer = sequence[i:i + kmer].upper() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
79 new_mer_rc = reverse_complement(new_mer) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
80 if new_mer in kmer_table: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
81 kmer_table[new_mer.upper()] += 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
82 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
83 kmer_table[new_mer.upper()] = 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
84 if new_mer_rc in kmer_table: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
85 kmer_table[new_mer_rc.upper()] += 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
86 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
87 kmer_table[new_mer_rc.upper()] = 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
88 return kmer_table |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
89 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
90 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
91 def multifasta_dict(multifasta): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
92 multifasta_list = [ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
93 line.strip() for line in open(multifasta, 'r') if len(line.strip()) > 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
94 ] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
95 headers = [i for i in multifasta_list if i[0] == '>'] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
96 multifasta_dict = {} |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
97 for h in headers: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
98 start = multifasta_list.index(h) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
99 for element in multifasta_list[start + 1:]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
100 if element[0] == '>': |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
101 break |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
102 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
103 if h[1:] in multifasta_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
104 multifasta_dict[h[1:]] += element |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
105 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
106 multifasta_dict[h[1:]] = element |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
107 return multifasta_dict |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
108 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
109 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
110 def multifasta_single_string(multifasta): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
111 multifasta_list = [ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
112 line.strip() for line in open(multifasta, 'r') |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
113 if (len(line.strip()) > 0) and (line.strip()[0] != '>') |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
114 ] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
115 return ''.join(multifasta_list) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
116 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
117 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
118 def chunk_a_long_sequence(long_sequence, chunk_size=60): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
119 chunk_list = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
120 steps = len(long_sequence) // 60 #how many chunks |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
121 for i in range(steps): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
122 chunk_list.append(long_sequence[i * chunk_size:(i + 1) * chunk_size]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
123 chunk_list.append(long_sequence[steps * chunk_size:len(long_sequence)]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
124 return chunk_list |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
125 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
126 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
127 def target_multifasta_kmerizer(multifasta, k, kmerDict): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
128 forward_length = 300 #if find the target, put forward 300 bases |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
129 reverse_length = 2200 #if find the target, put backward 2200 bases |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
130 chunk_size = 60 #it will firstly chunk the single long sequence to multiple smaller sequences, it controls the size of those smaller sequences |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
131 target_mers = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
132 long_single_string = multifasta_single_string(multifasta) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
133 multifasta_list = chunk_a_long_sequence(long_single_string, chunk_size) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
134 unit_length = len(multifasta_list[0]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
135 forward_lines = int(forward_length / unit_length) + 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
136 reverse_lines = int(forward_length / unit_length) + 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
137 start_num = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
138 end_num = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
139 for i in range(len(multifasta_list)): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
140 if i not in range(start_num, end_num): #avoid computational repetition |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
141 line = multifasta_list[i] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
142 start = int((len(line) - k) // 2) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
143 s1 = line[start:k + start] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
144 if s1 in kmerDict: #detect it is a potential read or not (use the middle part) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
145 if i - forward_lines >= 0: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
146 start_num = i - forward_lines |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
147 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
148 start_num = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
149 if i + reverse_lines <= len(multifasta_list) - 1: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
150 end_num = i + reverse_lines |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
151 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
152 end_num = len(multifasta_list) - 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
153 target_list = [ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
154 x.strip() for x in multifasta_list[start_num:end_num] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
155 ] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
156 target_line = "".join(target_list) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
157 target_mers += [ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
158 k1 for k1 in createKmerDict_reads([str(target_line)], k) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
159 ] ##changed k to k1, just want to avoid the mixes of this "k" (kmer) to the "k" above (kmer length) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
160 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
161 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
162 return set(target_mers) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
163 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
164 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
165 def target_read_kmerizer(file, k, kmerDict): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
166 i = 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
167 n_reads = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
168 total_coverage = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
169 target_mers = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
170 if file.endswith(".gz"): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
171 file_content = io.BufferedReader(gzip.open(file)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
172 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
173 file_content = open(file, "r").readlines() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
174 for line in file_content: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
175 start = int((len(line) - k) // 2) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
176 if i % 4 == 2: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
177 if file.endswith(".gz"): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
178 s1 = line[start:k + start].decode() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
179 line = line.decode() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
180 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
181 s1 = line[start:k + start] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
182 if s1 in kmerDict: #detect it is a potential read or not (use the middle part) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
183 n_reads += 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
184 total_coverage += len(line) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
185 target_mers += [ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
186 k1 for k1 in createKmerDict_reads([str(line)], k) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
187 ] #changed k to k1, just want to avoid the mixes of this "k" (kmer) to the "k" above (kmer length) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
188 i += 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
189 if total_coverage >= 4000000: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
190 break |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
191 return set(target_mers) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
192 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
193 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
194 def minion_fasta_kmerizer(file, k, kmerDict): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
195 i = 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
196 n_reads = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
197 total_coverage = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
198 target_mers = {} |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
199 for line in open(file): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
200 if i % 2 == 0: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
201 for kmer, rc_kmer in kmers(line.strip().upper(), k): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
202 if (kmer in kmerDict) or (rc_kmer in kmerDict): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
203 if kmer in target_mers: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
204 target_mers[kmer] += 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
205 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
206 target_mers[kmer] = 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
207 if rc_kmer in target_mers: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
208 target_mers[rc_kmer] += 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
209 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
210 target_mers[rc_kmer] = 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
211 i += 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
212 return set([h for h in target_mers]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
213 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
214 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
215 def minion_fastq_kmerizer(file, k, kmerDict): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
216 i = 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
217 n_reads = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
218 total_coverage = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
219 target_mers = {} |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
220 for line in open(file): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
221 if i % 4 == 2: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
222 for kmer, rc_kmer in kmers(line.strip().upper(), k): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
223 if (kmer in kmerDict) or (rc_kmer in kmerDict): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
224 if kmer in target_mers: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
225 target_mers[kmer] += 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
226 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
227 target_mers[kmer] = 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
228 if rc_kmer in target_mers: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
229 target_mers[rc_kmer] += 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
230 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
231 target_mers[rc_kmer] = 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
232 i += 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
233 return set([h for h in target_mers]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
234 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
235 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
236 def multifasta_single_string2(multifasta): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
237 single_string = '' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
238 with open(multifasta, 'r') as f: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
239 for line in f: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
240 if line.strip()[0] == '>': |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
241 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
242 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
243 single_string += line.strip() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
244 return single_string |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
245 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
246 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
247 def kmers(seq, k): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
248 rev_comp = reverse_complement(seq) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
249 for start in range(1, len(seq) - k + 1): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
250 yield seq[start:start + k], rev_comp[-(start + k):-start] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
251 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
252 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
253 def multifasta_to_kmers_dict(multifasta,k_size):#used to create database kmer set |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
254 multi_seq_dict = multifasta_dict(multifasta) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
255 lib_dict = {} |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
256 for h in multi_seq_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
257 lib_dict[h] = set( |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
258 [k for k in createKmerDict_reads([multi_seq_dict[h]], k_size)]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
259 return lib_dict |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
260 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
261 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
262 def Combine(b, c): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
263 fliC_combinations = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
264 fliC_combinations.append(",".join(c)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
265 temp_combinations = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
266 for i in range(len(b)): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
267 for x in itertools.combinations(b, i + 1): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
268 temp_combinations.append(",".join(x)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
269 for x in temp_combinations: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
270 temp = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
271 for y in c: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
272 temp.append(y) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
273 temp.append(x) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
274 temp = ",".join(temp) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
275 temp = temp.split(",") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
276 temp.sort() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
277 temp = ",".join(temp) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
278 fliC_combinations.append(temp) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
279 return fliC_combinations |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
280 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
281 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
282 def seqsero_from_formula_to_serotypes(Otype, fliC, fljB, special_gene_list,subspecies): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
283 #like test_output_06012017.txt |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
284 #can add more varialbles like sdf-type, sub-species-type in future (we can conclude it into a special-gene-list) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
285 from Initial_Conditions import phase1,phase2,phaseO,sero,subs,remove_list,rename_dict |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
286 rename_dict_not_anymore=[rename_dict[x] for x in rename_dict] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
287 rename_dict_all=rename_dict_not_anymore+list(rename_dict) #used for decide whether to |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
288 seronames = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
289 seronames_none_subspecies=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
290 for i in range(len(phase1)): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
291 fliC_combine = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
292 fljB_combine = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
293 if phaseO[i] == Otype: # no VII in KW, but it's there |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
294 ### for fliC, detect every possible combinations to avoid the effect of "[" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
295 if phase1[i].count("[") == 0: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
296 fliC_combine.append(phase1[i]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
297 elif phase1[i].count("[") >= 1: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
298 c = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
299 b = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
300 if phase1[i][0] == "[" and phase1[i][-1] == "]" and phase1[i].count( |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
301 "[") == 1: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
302 content = phase1[i].replace("[", "").replace("]", "") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
303 fliC_combine.append(content) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
304 fliC_combine.append("-") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
305 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
306 for x in phase1[i].split(","): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
307 if "[" in x: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
308 b.append(x.replace("[", "").replace("]", "")) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
309 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
310 c.append(x) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
311 fliC_combine = Combine( |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
312 b, c |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
313 ) #Combine will offer every possible combinations of the formula, like f,[g],t: f,t f,g,t |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
314 ### end of fliC "[" detect |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
315 ### for fljB, detect every possible combinations to avoid the effect of "[" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
316 if phase2[i].count("[") == 0: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
317 fljB_combine.append(phase2[i]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
318 elif phase2[i].count("[") >= 1: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
319 d = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
320 e = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
321 if phase2[i][0] == "[" and phase2[i][-1] == "]" and phase2[i].count( |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
322 "[") == 1: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
323 content = phase2[i].replace("[", "").replace("]", "") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
324 fljB_combine.append(content) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
325 fljB_combine.append("-") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
326 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
327 for x in phase2[i].split(","): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
328 if "[" in x: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
329 d.append(x.replace("[", "").replace("]", "")) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
330 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
331 e.append(x) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
332 fljB_combine = Combine(d, e) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
333 ### end of fljB "[" detect |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
334 new_fliC = fliC.split( |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
335 "," |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
336 ) #because some antigen like r,[i] not follow alphabetical order, so use this one to judge and can avoid missings |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
337 new_fliC.sort() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
338 new_fliC = ",".join(new_fliC) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
339 new_fljB = fljB.split(",") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
340 new_fljB.sort() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
341 new_fljB = ",".join(new_fljB) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
342 if (new_fliC in fliC_combine |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
343 or fliC in fliC_combine) and (new_fljB in fljB_combine |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
344 or fljB in fljB_combine): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
345 ######start, remove_list,rename_dict, added on 11/11/2018 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
346 if sero[i] not in remove_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
347 temp_sero=sero[i] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
348 if temp_sero in rename_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
349 temp_sero=rename_dict[temp_sero] #rename if in the rename list |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
350 if temp_sero not in seronames:#the new sero may already included, if yes, then not consider |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
351 if subs[i] == subspecies: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
352 seronames.append(temp_sero) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
353 seronames_none_subspecies.append(temp_sero) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
354 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
355 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
356 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
357 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
358 ######end, added on 11/11/2018 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
359 #analyze seronames |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
360 subspecies_pointer="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
361 if len(seronames) == 0 and len(seronames_none_subspecies)!=0: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
362 # ed_SL_12182019: modified to fix the subspecies output problem |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
363 #seronames=seronames_none_subspecies |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
364 seronames=["N/A"] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
365 #subspecies_pointer="1" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
366 subspecies_pointer="0" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
367 if len(seronames) == 0: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
368 seronames = [ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
369 "N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
370 ] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
371 star = "" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
372 star_line = "" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
373 if len(seronames) > 1: #there are two possible predictions for serotypes |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
374 star = "*" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
375 #changed 04072019 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
376 #star_line = "The predicted serotypes share the same general formula:\t" + Otype + ":" + fliC + ":" + fljB + "\n" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
377 if subspecies_pointer=="1" and len(seronames_none_subspecies)!=0: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
378 star="*" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
379 star_line="The predicted O and H antigens correspond to serotype '"+(" or ").join(seronames)+"' in the Kauffmann-White scheme. The predicted subspecies by SalmID (github.com/hcdenbakker/SalmID) may not be consistent with subspecies designation in the Kauffmann-White scheme. " + star_line |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
380 #star_line="The formula with this subspieces prediction can't get a serotype in KW manual, and the serotyping prediction was made without considering it."+star_line |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
381 if Otype=="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
382 Otype="-" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
383 predict_form = Otype + ":" + fliC + ":" + fljB |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
384 predict_sero = (" or ").join(seronames) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
385 ###special test for Enteritidis |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
386 if predict_form == "9:g,m:-": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
387 sdf = "-" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
388 for x in special_gene_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
389 if x.startswith("sdf"): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
390 sdf = "+" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
391 #star_line="Detected sdf gene, a marker to differentiate Gallinarum and Enteritidis" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
392 star_line="sdf gene detected. " |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
393 #predict_form = predict_form + " Sdf prediction:" + sdf |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
394 predict_form = predict_form #changed 04072019 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
395 if sdf == "-": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
396 star = "*" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
397 #star_line="Didn't detected sdf gene, a marker to differentiate Gallinarum and Enteritidis" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
398 star_line="sdf gene not detected. " |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
399 #changed in 04072019, for new output |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
400 #star_line = "Additional characterization is necessary to assign a serotype to this strain. Commonly circulating strains of serotype Enteritidis are sdf+, although sdf- strains of serotype Enteritidis are known to exist. Serotype Gallinarum is typically sdf- but should be quite rare. Sdf- strains of serotype Enteritidis and serotype Gallinarum can be differentiated by phenotypic profile or genetic criteria.\n" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
401 #predict_sero = "Gallinarum/Enteritidis" #04132019, for new output requirement |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
402 predict_sero = "Gallinarum or Enteritidis" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
403 ###end of special test for Enteritidis |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
404 elif predict_form == "4:i:-": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
405 predict_sero = "I 4,[5],12:i:-" # change serotype name |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
406 elif predict_form == "4:r:-": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
407 predict_sero = "N/A (4:r:-)" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
408 elif predict_form == "4:b:-": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
409 predict_sero = "N/A (4:b:-)" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
410 #elif predict_form == "8:e,h:1,2": #removed after official merge of newport and bardo |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
411 #predict_sero = "Newport" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
412 #star = "*" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
413 #star_line = "Serotype Bardo shares the same antigenic profile with Newport, but Bardo is exceedingly rare." |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
414 claim = "The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes.\n" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
415 if "N/A" in predict_sero: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
416 claim = "" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
417 #special test for Typhimurium |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
418 if "Typhimurium" in predict_sero or predict_form == "4:i:-": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
419 normal = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
420 mutation = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
421 for x in special_gene_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
422 if "oafA-O-4_full" in x: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
423 normal = float(special_gene_list[x]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
424 elif "oafA-O-4_5-" in x: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
425 mutation = float(special_gene_list[x]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
426 if normal > mutation: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
427 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
428 elif normal < mutation: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
429 #predict_sero = predict_sero.strip() + "(O5-)" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
430 predict_sero = predict_sero.strip() #diable special sero for new output requirement, 04132019 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
431 star = "*" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
432 #star_line = "Detected the deletion of O5-." |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
433 star_line = "Detected a deletion that causes O5- variant of Typhimurium. " |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
434 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
435 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
436 #special test for Paratyphi B |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
437 if "Paratyphi B" in predict_sero or predict_form == "4:b:-": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
438 normal = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
439 mutation = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
440 for x in special_gene_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
441 if "gntR-family-regulatory-protein_dt-positive" in x: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
442 normal = float(special_gene_list[x]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
443 elif "gntR-family-regulatory-protein_dt-negative" in x: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
444 mutation = float(special_gene_list[x]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
445 #print(normal,mutation) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
446 if normal > mutation: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
447 #predict_sero = predict_sero.strip() + "(dt+)" #diable special sero for new output requirement, 04132019 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
448 predict_sero = predict_sero.strip()+' var. L(+) tartrate+' if "Paratyphi B" in predict_sero else predict_sero.strip() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
449 star = "*" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
450 #star_line = "Didn't detect the SNP for dt- which means this isolate is a Paratyphi B variant L(+) tartrate(+)." |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
451 star_line = "The SNP that causes d-Tartrate nonfermentating phenotype of Paratyphi B was not detected. " |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
452 elif normal < mutation: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
453 #predict_sero = predict_sero.strip() + "(dt-)" #diable special sero for new output requirement, 04132019 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
454 predict_sero = predict_sero.strip() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
455 star = "*" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
456 #star_line = "Detected the SNP for dt- which means this isolate is a systemic pathovar of Paratyphi B." |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
457 star_line = "Detected the SNP for d-Tartrate nonfermenting phenotype of Paratyphi B. " |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
458 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
459 star = "*" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
460 #star_line = " Failed to detect the SNP for dt-, can't decide it's a Paratyphi B variant L(+) tartrate(+) or not." |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
461 star_line = " " ## ed_SL_05152019: do not report this situation. |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
462 #special test for O13,22 and O13,23 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
463 if Otype=="13": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
464 #ex_dir = os.path.dirname(os.path.realpath(__file__)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
465 ex_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)),'seqsero2_db')) # ed_SL_09152019 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
466 f = open(ex_dir + '/special.pickle', 'rb') |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
467 special = pickle.load(f) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
468 O22_O23=special['O22_O23'] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
469 if predict_sero.split(" or ")[0] in O22_O23[-1] and predict_sero.split(" or ")[0] not in rename_dict_all:#if in rename_dict_all, then it means already merged, no need to analyze |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
470 O22_score=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
471 O23_score=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
472 for x in special_gene_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
473 if "O:22" in x: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
474 O22_score = O22_score+float(special_gene_list[x]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
475 elif "O:23" in x: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
476 O23_score = O23_score+float(special_gene_list[x]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
477 #print(O22_score,O23_score) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
478 for z in O22_O23[0]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
479 if predict_sero.split(" or ")[0] in z: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
480 if O22_score > O23_score: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
481 star = "*" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
482 #star_line = "Detected O22 specific genes to further differenciate '"+predict_sero+"'." #diabled for new output requirement, 04132019 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
483 predict_sero = z[0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
484 elif O22_score < O23_score: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
485 star = "*" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
486 #star_line = "Detected O23 specific genes to further differenciate '"+predict_sero+"'." #diabled for new output requirement, 04132019 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
487 predict_sero = z[1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
488 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
489 star = "*" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
490 #star_line = "Fail to detect O22 and O23 differences." #diabled for new output requirement, 04132019 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
491 if " or " in predict_sero: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
492 star_line = star_line + "The predicted serotypes share the same general formula: " + Otype + ":" + fliC + ":" + fljB + "." |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
493 #special test for O6,8 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
494 #merge_O68_list=["Blockley","Bovismorbificans","Hadar","Litchfield","Manhattan","Muenchen"] #remove 11/11/2018, because already in merge list |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
495 #for x in merge_O68_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
496 # if x in predict_sero: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
497 # predict_sero=x |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
498 # star="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
499 # star_line="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
500 #special test for Montevideo; most of them are monophasic |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
501 #if "Montevideo" in predict_sero and "1,2,7" in predict_form: #remove 11/11/2018, because already in merge list |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
502 #star="*" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
503 #star_line="Montevideo is almost always monophasic, having an antigen called for the fljB position may be a result of Salmonella-Salmonella contamination." |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
504 return predict_form, predict_sero, star, star_line, claim |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
505 ### End of SeqSero Kmer part |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
506 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
507 ### Begin of SeqSero2 allele prediction and output |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
508 def xml_parse_score_comparision_seqsero(xmlfile): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
509 #used to do seqsero xml analysis |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
510 from Bio.Blast import NCBIXML |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
511 handle=open(xmlfile) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
512 handle=NCBIXML.parse(handle) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
513 handle=list(handle) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
514 List=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
515 List_score=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
516 List_ids=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
517 List_query_region=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
518 for i in range(len(handle)): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
519 if len(handle[i].alignments)>0: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
520 for j in range(len(handle[i].alignments)): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
521 score=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
522 ids=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
523 cover_region=set() #fixed problem that repeated calculation leading percentage > 1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
524 List.append(handle[i].query.strip()+"___"+handle[i].alignments[j].hit_def) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
525 for z in range(len(handle[i].alignments[j].hsps)): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
526 hsp=handle[i].alignments[j].hsps[z] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
527 temp=set(range(hsp.query_start,hsp.query_end)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
528 if len(cover_region)==0: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
529 cover_region=cover_region|temp |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
530 fraction=1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
531 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
532 fraction=1-len(cover_region&temp)/float(len(temp)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
533 cover_region=cover_region|temp |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
534 if "last" in handle[i].query or "first" in handle[i].query: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
535 score+=hsp.bits*fraction |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
536 ids+=float(hsp.identities)/handle[i].query_length*fraction |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
537 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
538 score+=hsp.bits*fraction |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
539 ids+=float(hsp.identities)/handle[i].query_length*fraction |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
540 List_score.append(score) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
541 List_ids.append(ids) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
542 List_query_region.append(cover_region) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
543 temp=zip(List,List_score,List_ids,List_query_region) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
544 Final_list=sorted(temp, key=lambda d:d[1], reverse = True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
545 return Final_list |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
546 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
547 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
548 def Uniq(L,sort_on_fre="none"): #return the uniq list and the count number |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
549 Old=L |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
550 L.sort() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
551 L = [L[i] for i in range(len(L)) if L[i] not in L[:i]] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
552 count=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
553 for j in range(len(L)): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
554 y=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
555 for x in Old: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
556 if L[j]==x: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
557 y+=1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
558 count.append(y) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
559 if sort_on_fre!="none": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
560 d=zip(*sorted(zip(count, L))) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
561 L=d[1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
562 count=d[0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
563 return (L,count) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
564 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
565 def judge_fliC_or_fljB_from_head_tail_for_one_contig(nodes_vs_score_list): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
566 #used to predict it's fliC or fljB for one contig, based on tail and head score, but output the score difference,if it is very small, then not reliable, use blast score for whole contig to test |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
567 #this is mainly used for |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
568 a=nodes_vs_score_list |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
569 fliC_score=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
570 fljB_score=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
571 for z in a: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
572 if "fliC" in z[0]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
573 fliC_score+=z[1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
574 elif "fljB" in z[0]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
575 fljB_score+=z[1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
576 if fliC_score>=fljB_score: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
577 role="fliC" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
578 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
579 role="fljB" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
580 return (role,abs(fliC_score-fljB_score)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
581 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
582 def judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(node_name,Final_list,Final_list_passed): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
583 #used to predict contig is fliC or fljB, if the differnce score value on above head_and_tail is less than 10 (quite small) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
584 #also used when no head or tail got blasted score for the contig |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
585 role="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
586 for z in Final_list_passed: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
587 if node_name in z[0]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
588 role=z[0].split("_")[0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
589 break |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
590 return role |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
591 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
592 def fliC_or_fljB_judge_from_head_tail_sequence(nodes_list,tail_head_list,Final_list,Final_list_passed): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
593 #nodes_list is the c created by c,d=Uniq(nodes) in below function |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
594 first_target="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
595 role_list=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
596 for x in nodes_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
597 a=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
598 role="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
599 for y in tail_head_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
600 if x in y[0]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
601 a.append(y) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
602 if len(a)==4: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
603 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
604 if diff<20: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
605 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
606 elif len(a)==3: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
607 ###however, if the one with highest score is the fewer one, compare their accumulation score |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
608 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
609 if diff<20: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
610 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
611 ###end of above score comparison |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
612 elif len(a)==2: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
613 #must on same node, if not, then decide with unit blast score, blast-score/length_of_special_sequence(30 or 37) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
614 temp=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
615 for z in a: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
616 temp.append(z[0].split("_")[0]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
617 m,n=Uniq(temp)#should only have one choice, but weird situation might occur too |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
618 if len(m)==1: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
619 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
620 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
621 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
622 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
623 if diff<20: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
624 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
625 ###need to desgin a algorithm to guess most possible situation for nodes_list, See the situations of test evaluation |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
626 elif len(a)==1: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
627 #that one |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
628 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
629 if diff<20: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
630 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
631 #need to evaluate, in future, may set up a cut-off, if not met, then just find Final_list_passed best match,like when "a==0" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
632 else:#a==0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
633 #use Final_list_passed best match |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
634 for z in Final_list_passed: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
635 if x in z[0]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
636 role=z[0].split("_")[0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
637 break |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
638 #print x,role,len(a) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
639 role_list.append((role,x)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
640 if len(role_list)==2: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
641 if role_list[0][0]==role_list[1][0]:#this is the most cocmmon error, two antigen were assigned to same phase |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
642 #just use score to do a final test |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
643 role_list=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
644 for x in nodes_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
645 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
646 role_list.append((role,x)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
647 return role_list |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
648 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
649 def decide_contig_roles_for_H_antigen(Final_list,Final_list_passed): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
650 #used to decide which contig is FliC and which one is fljB |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
651 contigs=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
652 nodes=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
653 for x in Final_list_passed: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
654 if x[0].startswith("fl") and "last" not in x[0] and "first" not in x[0]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
655 nodes.append(x[0].split("___")[1].strip()) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
656 c,d=Uniq(nodes)#c is node_list |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
657 #print c |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
658 tail_head_list=[x for x in Final_list if ("last" in x[0] or "first" in x[0])] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
659 roles=fliC_or_fljB_judge_from_head_tail_sequence(c,tail_head_list,Final_list,Final_list_passed) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
660 return roles |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
661 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
662 def decide_O_type_and_get_special_genes(Final_list,Final_list_passed): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
663 #decide O based on Final_list |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
664 O_choice="?" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
665 O_list=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
666 special_genes={} |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
667 nodes=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
668 for x in Final_list_passed: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
669 if x[0].startswith("O-"): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
670 nodes.append(x[0].split("___")[1].strip()) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
671 elif not x[0].startswith("fl"): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
672 special_genes[x[0]]=x[2]#08172018, x[2] changed from x[-1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
673 #print "special_genes:",special_genes |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
674 c,d=Uniq(nodes) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
675 #print "potential O antigen contig",c |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
676 final_O=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
677 O_nodes_list=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
678 for x in c:#c is the list for contigs |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
679 temp=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
680 for y in Final_list_passed: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
681 if x in y[0] and y[0].startswith("O-"): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
682 final_O.append(y) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
683 break |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
684 ### O contig has the problem of two genes on same contig, so do additional test |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
685 potenial_new_gene="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
686 for x in final_O: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
687 pointer=0 #for genes merged or not |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
688 #not consider O-1,3,19_not_in_3,10, too short compared with others |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
689 if "O-1,3,19_not_in_3,10" not in x[0] and int(x[0].split("__")[1].split("___")[0])*x[2]+850 <= int(x[0].split("length_")[1].split("_")[0]):#gene length << contig length; for now give 300*2 (for secureity can use 400*2) as flank region |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
690 pointer=x[0].split("___")[1].strip()#store the contig name |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
691 print(pointer) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
692 if pointer!=0:#it has potential merge event |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
693 for y in Final_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
694 if pointer in y[0] and y not in final_O and (y[1]>=int(y[0].split("__")[1].split("___")[0])*1.5 or (y[1]>=int(y[0].split("__")[1].split("___")[0])*y[2] and y[1]>=400)):#that's a realtively strict filter now; if passed, it has merge event and add one more to final_O |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
695 potenial_new_gene=y |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
696 #print(potenial_new_gene) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
697 break |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
698 if potenial_new_gene!="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
699 print("two differnt genes in same contig, fix it for O antigen") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
700 print(potenial_new_gene[:3]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
701 pointer=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
702 for y in final_O: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
703 if y[0].split("___")[-1]==potenial_new_gene[0].split("___")[-1]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
704 pointer=1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
705 if pointer!=0: #changed to consider two genes in same contig |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
706 final_O.append(potenial_new_gene) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
707 ### end of the two genes on same contig test |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
708 final_O=sorted(final_O,key=lambda x: x[2], reverse=True)#sorted |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
709 if len(final_O)==0 or (len(final_O)==1 and "O-1,3,19_not_in_3,10" in final_O[0][0]): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
710 #print "$$$No Otype, due to no hit"#may need to be changed |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
711 O_choice="-" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
712 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
713 highest_O_coverage=max([float(x[0].split("_cov_")[-1].split("_")[0]) for x in final_O if "O-1,3,19_not_in_3,10" not in x[0]]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
714 O_list=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
715 O_list_less_contamination=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
716 for x in final_O: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
717 if not "O-1,3,19_not_in_3,10__130" in x[0]:#O-1,3,19_not_in_3,10 is too small, which may affect further analysis; to avoid contamination affect, use 0.15 of highest coverage as cut-off |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
718 O_list.append(x[0].split("__")[0]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
719 O_nodes_list.append(x[0].split("___")[1]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
720 if float(x[0].split("_cov_")[-1].split("_")[0])>highest_O_coverage*0.15: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
721 O_list_less_contamination.append(x[0].split("__")[0]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
722 ### special test for O9,46 and O3,10 family |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
723 if ("O-9,46_wbaV" in O_list or "O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254" in O_list) and O_list_less_contamination[0].startswith("O-9,"):#not sure should use and float(O9_wbaV)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
724 if "O-9,46_wzy" in O_list or "O-9,46_wzy_partial" in O_list:#and float(O946_wzy)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
725 O_choice="O-9,46" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
726 #print "$$$Most possilble Otype: O-9,46" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
727 elif "O-9,46,27_partial_wzy" in O_list:#and float(O94627)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
728 O_choice="O-9,46,27" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
729 #print "$$$Most possilble Otype: O-9,46,27" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
730 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
731 O_choice="O-9"#next, detect O9 vs O2? |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
732 O2=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
733 O9=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
734 for z in special_genes: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
735 if "tyr-O-9" in z: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
736 O9=special_genes[z] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
737 elif "tyr-O-2" in z: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
738 O2=special_genes[z] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
739 if O2>O9: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
740 O_choice="O-2" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
741 elif O2<O9: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
742 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
743 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
744 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
745 #print "$$$No suitable one, because can't distinct it's O-9 or O-2, but O-9 has a more possibility." |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
746 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and (O_list[0].startswith("O-3,10") or O_list_less_contamination[0].startswith("O-9,46_wzy")):#and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
747 if "O-3,10_not_in_1,3,19" in O_list:#and float(O310_no_1319)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
748 O_choice="O-3,10" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
749 #print "$$$Most possilble Otype: O-3,10 (contain O-3,10_not_in_1,3,19)" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
750 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
751 O_choice="O-1,3,19" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
752 #print "$$$Most possilble Otype: O-1,3,19 (not contain O-3,10_not_in_1,3,19)" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
753 ### end of special test for O9,46 and O3,10 family |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
754 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
755 try: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
756 max_score=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
757 for x in final_O: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
758 if x[2]>=max_score and float(x[0].split("_cov_")[-1].split("_")[0])>highest_O_coverage*0.15:#use x[2],08172018, the "coverage identity = cover_length * identity"; also meet coverage threshold |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
759 max_score=x[2]#change from x[-1] to x[2],08172018 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
760 O_choice=x[0].split("_")[0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
761 if O_choice=="O-1,3,19": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
762 O_choice=final_O[1][0].split("_")[0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
763 #print "$$$Most possilble Otype: ",O_choice |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
764 except: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
765 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
766 #print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
767 if O_choice=="O-9,46,27" and len(O_list)==2 and "O-4_wzx" in O_list: #special for very low chance sitatuion between O4 and O9,27,46, this is for serotypes like Bredeney and Schwarzengrund (normallly O-4 will have higher score, but sometimes sequencing quality may affect the prediction) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
768 O_choice="O-4" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
769 #print "O:",O_choice,O_nodes_list |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
770 Otypes=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
771 for x in O_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
772 if x!="O-1,3,19_not_in_3,10": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
773 if "O-9,46_" not in x: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
774 Otypes.append(x.split("_")[0]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
775 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
776 Otypes.append(x.split("-from")[0])#O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
777 #Otypes=[x.split("_")[0] for x in O_list if x!="O-1,3,19_not_in_3,10"] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
778 Otypes_uniq,Otypes_fre=Uniq(Otypes) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
779 contamination_O="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
780 if O_choice=="O-9,46,27" or O_choice=="O-3,10" or O_choice=="O-1,3,19": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
781 if len(Otypes_uniq)>2: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
782 contamination_O="potential contamination from O antigen signals" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
783 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
784 if len(Otypes_uniq)>1: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
785 if O_choice=="O-4" and len(Otypes_uniq)==2 and "O-9,46,27" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
786 contamination_O="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
787 elif O_choice=="O-9,46" and len(Otypes_uniq)==2 and "O-9,46_wbaV" in Otypes_uniq and "O-9,46_wzy" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
788 contamination_O="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
789 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
790 contamination_O="potential contamination from O antigen signals" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
791 return O_choice,O_nodes_list,special_genes,final_O,contamination_O,Otypes_uniq |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
792 ### End of SeqSero2 allele prediction and output |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
793 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
794 def get_input_files(make_dir,input_file,data_type,dirpath): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
795 #tell input files from datatype |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
796 #"<int>: '1'(pair-end reads, interleaved),'2'(pair-end reads, seperated),'3'(single-end reads), '4'(assembly),'5'(nanopore fasta),'6'(nanopore fastq)" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
797 for_fq="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
798 rev_fq="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
799 os.chdir(make_dir) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
800 if data_type=="1": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
801 input_file=input_file[0].split("/")[-1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
802 if input_file.endswith(".sra"): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
803 subprocess.check_call("fastq-dump --split-files "+input_file,shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
804 for_fq=input_file.replace(".sra","_1.fastq") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
805 rev_fq=input_file.replace(".sra","_2.fastq") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
806 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
807 core_id=input_file.split(".fastq")[0].split(".fq")[0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
808 for_fq=core_id+"_1.fastq" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
809 rev_fq=core_id+"_2.fastq" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
810 if input_file.endswith(".gz"): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
811 subprocess.check_call("gzip -dc "+input_file+" | "+dirpath+"/deinterleave_fastq.sh "+for_fq+" "+rev_fq,shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
812 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
813 subprocess.check_call("cat "+input_file+" | "+dirpath+"/deinterleave_fastq.sh "+for_fq+" "+rev_fq,shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
814 elif data_type=="2": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
815 for_fq=input_file[0].split("/")[-1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
816 rev_fq=input_file[1].split("/")[-1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
817 elif data_type=="3": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
818 input_file=input_file[0].split("/")[-1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
819 if input_file.endswith(".sra"): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
820 subprocess.check_call("fastq-dump --split-files "+input_file,shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
821 for_fq=input_file.replace(".sra","_1.fastq") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
822 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
823 for_fq=input_file |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
824 elif data_type in ["4","5","6"]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
825 for_fq=input_file[0].split("/")[-1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
826 os.chdir("..") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
827 return for_fq,rev_fq |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
828 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
829 def predict_O_and_H_types(Final_list,Final_list_passed,new_fasta): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
830 #get O and H types from Final_list from blast parsing; allele mode |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
831 from Bio import SeqIO |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
832 fliC_choice="-" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
833 fljB_choice="-" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
834 fliC_contig="NA" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
835 fljB_contig="NA" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
836 fliC_region=set([0]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
837 fljB_region=set([0,]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
838 fliC_length=0 #can be changed to coverage in future; in 03292019, changed to ailgned length |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
839 fljB_length=0 #can be changed to coverage in future; in 03292019, changed to ailgned length |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
840 O_choice="-"#no need to decide O contig for now, should be only one |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
841 O_choice,O_nodes,special_gene_list,O_nodes_roles,contamination_O,Otypes_uniq=decide_O_type_and_get_special_genes(Final_list,Final_list_passed)#decide the O antigen type and also return special-gene-list for further identification |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
842 O_choice=O_choice.split("-")[-1].strip() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
843 if (O_choice=="1,3,19" and len(O_nodes_roles)==1 and "1,3,19" in O_nodes_roles[0][0]) or O_choice=="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
844 O_choice="-" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
845 H_contig_roles=decide_contig_roles_for_H_antigen(Final_list,Final_list_passed)#decide the H antigen contig is fliC or fljB |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
846 #add alignment locations, used for further selection, 03312019 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
847 for i in range(len(H_contig_roles)): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
848 x=H_contig_roles[i] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
849 for y in Final_list_passed: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
850 if x[1] in y[0] and y[0].startswith(x[0]): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
851 H_contig_roles[i]+=H_contig_roles[i]+(y[-1],) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
852 break |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
853 log_file=open("SeqSero_log.txt","a") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
854 extract_file=open("Extracted_antigen_alleles.fasta","a") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
855 handle_fasta=list(SeqIO.parse(new_fasta,"fasta")) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
856 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
857 #print("O_contigs:") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
858 log_file.write("O_contigs:\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
859 extract_file.write("#Sequences with antigen signals (if the micro-assembled contig only covers the flanking region, it will not be used for contamination analysis)\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
860 extract_file.write("#O_contigs:\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
861 for x in O_nodes_roles: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
862 if "O-1,3,19_not_in_3,10" not in x[0]:#O-1,3,19_not_in_3,10 is just a small size marker |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
863 #print(x[0].split("___")[-1],x[0].split("__")[0],"blast score:",x[1],"identity%:",str(round(x[2]*100,2))+"%",str(min(x[-1]))+" to "+str(max(x[-1]))) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
864 log_file.write(x[0].split("___")[-1]+" "+x[0].split("__")[0]+"; "+"blast score: "+str(x[1])+" identity%: "+str(round(x[2]*100,2))+"%; alignment from "+str(min(x[-1]))+" to "+str(max(x[-1]))+" of antigen\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
865 title=">"+x[0].split("___")[-1]+" "+x[0].split("__")[0]+"; "+"blast score: "+str(x[1])+" identity%: "+str(round(x[2]*100,2))+"%; alignment from "+str(min(x[-1]))+" to "+str(max(x[-1]))+" of antigen\n" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
866 seqs="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
867 for z in handle_fasta: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
868 if x[0].split("___")[-1]==z.description: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
869 seqs=str(z.seq) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
870 extract_file.write(title+seqs+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
871 if len(H_contig_roles)!=0: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
872 highest_H_coverage=max([float(x[1].split("_cov_")[-1].split("_")[0]) for x in H_contig_roles]) #less than highest*0.1 would be regarded as contamination and noises, they will still be considered in contamination detection and logs, but not used as final serotype output |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
873 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
874 highest_H_coverage=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
875 for x in H_contig_roles: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
876 #if multiple choices, temporately select the one with longest length for now, will revise in further change |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
877 if "fliC" == x[0] and len(x[-1])>=fliC_length and x[1] not in O_nodes and float(x[1].split("_cov_")[-1].split("_")[0])>highest_H_coverage*0.13:#remember to avoid the effect of O-type contig, so should not in O_node list |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
878 fliC_contig=x[1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
879 fliC_length=len(x[-1]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
880 elif "fljB" == x[0] and len(x[-1])>=fljB_length and x[1] not in O_nodes and float(x[1].split("_cov_")[-1].split("_")[0])>highest_H_coverage*0.13: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
881 fljB_contig=x[1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
882 fljB_length=len(x[-1]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
883 for x in Final_list_passed: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
884 if fliC_choice=="-" and "fliC_" in x[0] and fliC_contig in x[0]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
885 fliC_choice=x[0].split("_")[1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
886 elif fljB_choice=="-" and "fljB_" in x[0] and fljB_contig in x[0]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
887 fljB_choice=x[0].split("_")[1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
888 elif fliC_choice!="-" and fljB_choice!="-": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
889 break |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
890 #now remove contigs not in middle core part |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
891 first_allele="NA" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
892 first_allele_percentage=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
893 for x in Final_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
894 if x[0].startswith("fliC") or x[0].startswith("fljB"): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
895 first_allele=x[0].split("__")[0] #used to filter those un-middle contigs |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
896 first_allele_percentage=x[2] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
897 break |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
898 additional_contigs=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
899 for x in Final_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
900 if first_allele in x[0]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
901 if (fliC_contig == x[0].split("___")[-1]): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
902 fliC_region=x[3] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
903 elif fljB_contig!="NA" and (fljB_contig == x[0].split("___")[-1]): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
904 fljB_region=x[3] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
905 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
906 if x[1]*1.1>int(x[0].split("___")[1].split("_")[3]):#loose threshold by multiplying 1.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
907 additional_contigs.append(x) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
908 #else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
909 #print x[:3] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
910 #we can just use the fljB region (or fliC depends on size), no matter set() or contain a large locations (without middle part); however, if none of them is fully assembled, use 500 and 1200 as conservative cut-off |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
911 if first_allele_percentage>0.9: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
912 if len(fliC_region)>len(fljB_region) and (max(fljB_region)-min(fljB_region))>1000: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
913 target_region=fljB_region|(fliC_region-set(range(min(fljB_region),max(fljB_region)))) #fljB_region|(fliC_region-set(range(min(fljB_region),max(fljB_region)))) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
914 elif len(fliC_region)<len(fljB_region) and (max(fliC_region)-min(fliC_region))>1000: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
915 target_region=fliC_region|(fljB_region-set(range(min(fliC_region),max(fliC_region)))) #fljB_region|(fliC_region-set(range(min(fljB_region),max(fljB_region)))) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
916 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
917 target_region=set()#doesn't do anything |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
918 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
919 target_region=set()#doesn't do anything |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
920 #print(target_region) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
921 #print(additional_contigs) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
922 target_region2=set(list(range(0,525))+list(range(1200,1700)))#I found to use 500 to 1200 as special region would be best |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
923 target_region=target_region2|target_region |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
924 for x in additional_contigs: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
925 removal=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
926 contig_length=int(x[0].split("___")[1].split("length_")[-1].split("_")[0]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
927 if fljB_contig not in x[0] and fliC_contig not in x[0] and len(target_region&x[3])/float(len(x[3]))>0.65 and contig_length*0.5<len(x[3])<contig_length*1.5: #consider length and alignment length for now, but very loose,0.5 and 1.5 as cut-off |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
928 removal=1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
929 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
930 if first_allele_percentage > 0.9 and float(x[0].split("__")[1].split("___")[0])*x[2]/len(x[-1])>0.96:#if high similiarity with middle part of first allele (first allele >0.9, already cover middle part) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
931 removal=1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
932 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
933 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
934 if removal==1: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
935 for y in H_contig_roles: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
936 if y[1] in x[0]: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
937 H_contig_roles.remove(y) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
938 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
939 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
940 #print(x[:3],contig_length,len(target_region&x[3])/float(len(x[3])),contig_length*0.5,len(x[3]),contig_length*1.5) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
941 #end of removing none-middle contigs |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
942 #print("H_contigs:") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
943 log_file.write("H_contigs:\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
944 extract_file.write("#H_contigs:\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
945 H_contig_stat=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
946 H1_cont_stat={} |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
947 H2_cont_stat={} |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
948 for i in range(len(H_contig_roles)): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
949 x=H_contig_roles[i] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
950 a=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
951 for y in Final_list_passed: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
952 if x[1] in y[0] and y[0].startswith(x[0]): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
953 if "first" in y[0] or "last" in y[0]: #this is the final filter to decide it's fliC or fljB, if can't pass, then can't decide |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
954 for y in Final_list_passed: #it's impossible to has the "first" and "last" allele as prediction, so re-do it |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
955 if x[1] in y[0]:#it's very possible to be third phase allele, so no need to make it must be fliC or fljB |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
956 #print(x[1],"can't_decide_fliC_or_fljB",y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%",str(min(y[-1]))+" to "+str(max(y[-1]))) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
957 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antigen\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
958 H_contig_roles[i]="can't decide fliC or fljB, may be third phase" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
959 title=">"+x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antiten\n" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
960 seqs="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
961 for z in handle_fasta: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
962 if x[1]==z.description: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
963 seqs=str(z.seq) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
964 extract_file.write(title+seqs+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
965 break |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
966 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
967 #print(x[1],x[0],y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%",str(min(y[-1]))+" to "+str(max(y[-1]))) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
968 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antigen\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
969 title=">"+x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antigen\n" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
970 seqs="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
971 for z in handle_fasta: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
972 if x[1]==z.description: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
973 seqs=str(z.seq) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
974 extract_file.write(title+seqs+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
975 if x[0]=="fliC": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
976 if y[0].split("_")[1] not in H1_cont_stat: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
977 H1_cont_stat[y[0].split("_")[1]]=y[2] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
978 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
979 H1_cont_stat[y[0].split("_")[1]]+=y[2] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
980 if x[0]=="fljB": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
981 if y[0].split("_")[1] not in H2_cont_stat: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
982 H2_cont_stat[y[0].split("_")[1]]=y[2] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
983 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
984 H2_cont_stat[y[0].split("_")[1]]+=y[2] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
985 break |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
986 #detect contaminations |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
987 #print(H1_cont_stat) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
988 #print(H2_cont_stat) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
989 H1_cont_stat_list=[x for x in H1_cont_stat if H1_cont_stat[x]>0.2] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
990 H2_cont_stat_list=[x for x in H2_cont_stat if H2_cont_stat[x]>0.2] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
991 contamination_H="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
992 if len(H1_cont_stat_list)>1 or len(H2_cont_stat_list)>1: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
993 contamination_H="potential contamination from H antigen signals" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
994 elif len(H2_cont_stat_list)==1 and fljB_contig=="NA": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
995 contamination_H="potential contamination from H antigen signals, uncommon weak fljB signals detected" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
996 #get additional antigens |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
997 """ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
998 if ("O-9,46_wbaV" in O_list or "O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254" in O_list) and O_list_less_contamination[0].startswith("O-9,"):#not sure should use and float(O9_wbaV)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
999 if "O-9,46_wzy" in O_list:#and float(O946_wzy)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1000 O_choice="O-9,46" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1001 #print "$$$Most possilble Otype: O-9,46" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1002 elif "O-9,46,27_partial_wzy" in O_list:#and float(O94627)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1003 O_choice="O-9,46,27" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1004 #print "$$$Most possilble Otype: O-9,46,27" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1005 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and (O_list[0].startswith("O-3,10") or O_list_less_contamination[0].startswith("O-9,46_wzy")):#and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1006 if "O-3,10_not_in_1,3,19" in O_list:#and float(O310_no_1319)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1007 O_choice="O-3,10" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1008 #print "$$$Most possilble Otype: O-3,10 (contain O-3,10_not_in_1,3,19)" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1009 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1010 O_choice="O-1,3,19" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1011 #print "$$$Most possilble Otype: O-1,3,19 (not contain O-3,10_not_in_1,3,19)" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1012 ### end of special test for O9,46 and O3,10 family |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1013 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1014 if O_choice=="O-9,46,27" or O_choice=="O-3,10" or O_choice=="O-1,3,19": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1015 if len(Otypes_uniq)>2: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1016 contamination_O="potential contamination from O antigen signals" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1017 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1018 if len(Otypes_uniq)>1: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1019 if O_choice=="O-4" and len(Otypes_uniq)==2 and "O-9,46,27" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1020 contamination_O="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1021 elif O_choice=="O-9,46" and len(Otypes_uniq)==2 and "O-9,46_wbaV" in Otypes_uniq and "O-9,46_wzy" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1022 contamination_O="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1023 """ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1024 additonal_antigents=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1025 #print(contamination_O) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1026 #print(contamination_H) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1027 log_file.write(contamination_O+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1028 log_file.write(contamination_H+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1029 log_file.close() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1030 return O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H,Otypes_uniq,H1_cont_stat_list,H2_cont_stat_list |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1031 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1032 def get_input_K(input_file,lib_dict,data_type,k_size): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1033 #kmer mode; get input_Ks from dict and data_type |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1034 kmers = [] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1035 for h in lib_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1036 kmers += lib_dict[h] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1037 if data_type == '4': |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1038 input_Ks = target_multifasta_kmerizer(input_file, k_size, set(kmers)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1039 elif data_type == '1' or data_type == '2' or data_type == '3':#set it for now, will change later |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1040 input_Ks = target_read_kmerizer(input_file, k_size, set(kmers)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1041 elif data_type == '5':#minion_2d_fasta |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1042 input_Ks = minion_fasta_kmerizer(input_file, k_size, set(kmers)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1043 if data_type == '6':#minion_2d_fastq |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1044 input_Ks = minion_fastq_kmerizer(input_file, k_size, set(kmers)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1045 return input_Ks |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1046 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1047 def get_kmer_dict(lib_dict,input_Ks): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1048 #kmer mode; get predicted types |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1049 O_dict = {} |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1050 H_dict = {} |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1051 Special_dict = {} |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1052 for h in lib_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1053 score = (len(lib_dict[h] & input_Ks) / len(lib_dict[h])) * 100 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1054 if score > 1: # Arbitrary cut-off for similarity score very low but seems necessary to detect O-3,10 in some cases |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1055 if h.startswith('O-') and score > 25: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1056 O_dict[h] = score |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1057 if h.startswith('fl') and score > 40: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1058 H_dict[h] = score |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1059 if (h[:2] != 'fl') and (h[:2] != 'O-'): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1060 Special_dict[h] = score |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1061 return O_dict,H_dict,Special_dict |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1062 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1063 def call_O_and_H_type(O_dict,H_dict,Special_dict,make_dir): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1064 log_file=open("SeqSero_log.txt","a") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1065 log_file.write("O_scores:\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1066 #call O: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1067 highest_O = '-' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1068 if len(O_dict) == 0: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1069 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1070 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1071 for x in O_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1072 log_file.write(x+"\t"+str(O_dict[x])+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1073 if ('O-9,46_wbaV__1002' in O_dict and O_dict['O-9,46_wbaV__1002']>70) or ("O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002" in O_dict and O_dict['O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002']>70): # not sure should use and float(O9_wbaV)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1074 #if 'O-9,46_wzy__1191' in O_dict or "O-9,46_wzy_partial__216" in O_dict: # and float(O946_wzy)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1075 #modified to fix miscall of O-9,46 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1076 if ('O-9,46_wzy__1191' in O_dict and O_dict['O-9,46_wzy__1191']>40) or ("O-9,46_wzy_partial__216" in O_dict and O_dict["O-9,46_wzy_partial__216"]>40): # and float(O946_wzy)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1077 highest_O = "O-9,46" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1078 elif "O-9,46,27_partial_wzy__1019" in O_dict: # and float(O94627)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1079 highest_O = "O-9,46,27" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1080 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1081 highest_O = "O-9" # next, detect O9 vs O2? |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1082 O2 = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1083 O9 = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1084 for z in Special_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1085 if "tyr-O-9" in z: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1086 O9 = float(Special_dict[z]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1087 if "tyr-O-2" in z: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1088 O2 = float(Special_dict[z]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1089 if O2 > O9: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1090 highest_O = "O-2" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1091 elif ("O-3,10_wzx__1539" in O_dict) and ( |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1092 "O-9,46_wzy__1191" in O_dict |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1093 ): # and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1094 if "O-3,10_not_in_1,3,19__1519" in O_dict: # and float(O310_no_1319)/float(num_1) > 0.1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1095 highest_O = "O-3,10" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1096 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1097 highest_O = "O-1,3,19" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1098 ### end of special test for O9,46 and O3,10 family |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1099 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1100 try: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1101 max_score = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1102 for x in O_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1103 if float(O_dict[x]) >= max_score: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1104 max_score = float(O_dict[x]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1105 #highest_O = x.split("_")[0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1106 # ed_SL_12182019: modified to fix the O-9,46 error example1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1107 if (x == 'O-9,46_wbaV__1002' or x == 'O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002') and ('O-9,46_wzy__1191' not in O_dict and 'O-9,46_wzy_partial__216' not in O_dict): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1108 highest_O = "O-9" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1109 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1110 highest_O = x.split("_")[0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1111 if highest_O == "O-1,3,19": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1112 highest_O = '-' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1113 max_score = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1114 for x in O_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1115 if x == 'O-1,3,19_not_in_3,10__130': |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1116 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1117 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1118 if float(O_dict[x]) >= max_score: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1119 max_score = float(O_dict[x]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1120 #highest_O = x.split("_")[0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1121 # ed_SL_12182019: modified to fix the O-9,46 error example1 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1122 if (x == 'O-9,46_wbaV__1002' or x == 'O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002') and ('O-9,46_wzy__1191' not in O_dict and 'O-9,46_wzy_partial__216' not in O_dict): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1123 highest_O = "O-9" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1124 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1125 highest_O = x.split("_")[0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1126 except: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1127 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1128 #call_fliC: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1129 if len(H_dict)!=0: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1130 highest_H_score_both_BC=H_dict[max(H_dict.keys(), key=(lambda k: H_dict[k]))] #used to detect whether fljB existed or not |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1131 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1132 highest_H_score_both_BC=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1133 highest_fliC = '-' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1134 highest_fliC_raw = '-' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1135 highest_Score = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1136 log_file.write("\nH_scores:\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1137 for s in H_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1138 log_file.write(s+"\t"+str(H_dict[s])+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1139 if s.startswith('fliC'): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1140 if float(H_dict[s]) > highest_Score: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1141 highest_fliC = s.split('_')[1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1142 highest_fliC_raw = s |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1143 highest_Score = float(H_dict[s]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1144 #call_fljB |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1145 highest_fljB = '-' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1146 highest_fljB_raw = '-' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1147 highest_Score = 0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1148 for s in H_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1149 if s.startswith('fljB'): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1150 if float(H_dict[s]) > highest_Score and float(H_dict[s]) > highest_H_score_both_BC * 0.65: #fljB is special, so use highest_H_score_both_BC to give a general estimate of coverage, currently 0.65 seems pretty good; the reason use a high (0.65) is some fliC and fljB shared with each other |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1151 #highest_fljB = s.split('_')[1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1152 #highest_fljB_raw = s |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1153 #highest_Score = float(H_dict[s]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1154 if s.split('_')[1]!=highest_fliC: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1155 highest_fljB = s.split('_')[1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1156 highest_fljB_raw = s |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1157 highest_Score = float(H_dict[s]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1158 log_file.write("\nSpecial_scores:\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1159 for s in Special_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1160 log_file.write(s+"\t"+str(Special_dict[s])+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1161 log_file.close() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1162 return highest_O,highest_fliC,highest_fljB |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1163 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1164 def get_temp_file_names(for_fq,rev_fq): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1165 #seqsero2 -a; get temp file names |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1166 sam=for_fq+".sam" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1167 bam=for_fq+".bam" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1168 sorted_bam=for_fq+"_sorted.bam" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1169 mapped_fq1=for_fq+"_mapped.fq" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1170 mapped_fq2=rev_fq+"_mapped.fq" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1171 combined_fq=for_fq+"_combined.fq" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1172 for_sai=for_fq+".sai" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1173 rev_sai=rev_fq+".sai" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1174 return sam,bam,sorted_bam,mapped_fq1,mapped_fq2,combined_fq,for_sai,rev_sai |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1175 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1176 def map_and_sort(threads,database,fnameA,fnameB,sam,bam,for_sai,rev_sai,sorted_bam,mapping_mode): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1177 #seqsero2 -a; do mapping and sort |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1178 print("building database...") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1179 subprocess.check_call("bwa index "+database+ " 2>> data_log.txt",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1180 print("mapping...") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1181 if mapping_mode=="mem": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1182 subprocess.check_call("bwa mem -k 17 -t "+threads+" "+database+" "+fnameA+" "+fnameB+" > "+sam+ " 2>> data_log.txt",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1183 elif mapping_mode=="sam": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1184 if fnameB!="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1185 subprocess.check_call("bwa aln -t "+threads+" "+database+" "+fnameA+" > "+for_sai+ " 2>> data_log.txt",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1186 subprocess.check_call("bwa aln -t "+threads+" "+database+" "+fnameB+" > "+rev_sai+ " 2>> data_log.txt",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1187 subprocess.check_call("bwa sampe "+database+" "+for_sai+" "+ rev_sai+" "+fnameA+" "+fnameB+" > "+sam+ " 2>> data_log.txt",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1188 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1189 subprocess.check_call("bwa aln -t "+threads+" "+database+" "+fnameA+" > "+for_sai+ " 2>> data_log.txt",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1190 subprocess.check_call("bwa samse "+database+" "+for_sai+" "+for_fq+" > "+sam) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1191 subprocess.check_call("samtools view -@ "+threads+" -F 4 -Sh "+sam+" > "+bam,shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1192 ### check the version of samtools then use differnt commands |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1193 samtools_version=subprocess.Popen(["samtools"],stdout=subprocess.PIPE,stderr=subprocess.PIPE) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1194 out, err = samtools_version.communicate() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1195 version = str(err).split("ersion:")[1].strip().split(" ")[0].strip() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1196 print("check samtools version:",version) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1197 ### end of samtools version check and its analysis |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1198 if LooseVersion(version)<=LooseVersion("1.2"): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1199 subprocess.check_call("samtools sort -@ "+threads+" -n "+bam+" "+fnameA+"_sorted",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1200 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1201 subprocess.check_call("samtools sort -@ "+threads+" -n "+bam+" >"+sorted_bam,shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1202 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1203 def extract_mapped_reads_and_do_assembly_and_blast(current_time,sorted_bam,combined_fq,mapped_fq1,mapped_fq2,threads,fnameA,fnameB,database,mapping_mode): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1204 #seqsero2 -a; extract, assembly and blast |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1205 subprocess.check_call("bamToFastq -i "+sorted_bam+" -fq "+combined_fq,shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1206 #print("fnameA:",fnameA) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1207 #print("fnameB:",fnameB) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1208 if fnameB!="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1209 subprocess.check_call("bamToFastq -i "+sorted_bam+" -fq "+mapped_fq1+" -fq2 "+mapped_fq2 + " 2>> data_log.txt",shell=True)#2> /dev/null if want no output |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1210 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1211 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1212 outdir=current_time+"_temp" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1213 print("assembling...") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1214 if int(threads)>4: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1215 t="4" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1216 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1217 t=threads |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1218 if os.path.getsize(combined_fq)>100 and (fnameB=="" or os.path.getsize(mapped_fq1)>100):#if not, then it's "-:-:-" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1219 if fnameB!="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1220 subprocess.check_call("spades.py --careful --pe1-s "+combined_fq+" --pe1-1 "+mapped_fq1+" --pe1-2 "+mapped_fq2+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1221 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1222 subprocess.check_call("spades.py --careful --pe1-s "+combined_fq+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1223 new_fasta=fnameA+"_"+database+"_"+mapping_mode+".fasta" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1224 #new_fasta=fnameA+"_"+database.split('/')[-1]+"_"+mapping_mode+".fasta" # change path to databse for packaging |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1225 subprocess.check_call("mv "+outdir+"/contigs.fasta "+new_fasta+ " 2> /dev/null",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1226 #os.system("mv "+outdir+"/scaffolds.fasta "+new_fasta+ " 2> /dev/null") contigs.fasta |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1227 subprocess.check_call("rm -rf "+outdir+ " 2> /dev/null",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1228 print("blasting...","\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1229 xmlfile="blasted_output.xml"#fnameA+"-extracted_vs_"+database+"_"+mapping_mode+".xml" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1230 subprocess.check_call('makeblastdb -in '+new_fasta+' -out '+new_fasta+'_db '+'-dbtype nucl >> data_log.txt 2>&1',shell=True) #temp.txt is to forbid the blast result interrupt the output of our program###1/27/2015 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1231 subprocess.check_call("blastn -query "+database+" -db "+new_fasta+"_db -out "+xmlfile+" -outfmt 5 >> data_log.txt 2>&1",shell=True)###1/27/2015; 08272018, remove "-word_size 10" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1232 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1233 xmlfile="NA" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1234 return xmlfile,new_fasta |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1235 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1236 def judge_subspecies(fnameA): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1237 #seqsero2 -a; judge subspecies on just forward raw reads fastq |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1238 salmID_output=subprocess.Popen("SalmID.py -i "+fnameA,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1239 out, err = salmID_output.communicate() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1240 out=out.decode("utf-8") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1241 file=open("data_log.txt","a") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1242 file.write(out) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1243 file.close() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1244 salm_species_scores=out.split("\n")[1].split("\t")[6:] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1245 salm_species_results=out.split("\n")[0].split("\t")[6:] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1246 max_score=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1247 max_score_index=1 #default is 1, means "I" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1248 for i in range(len(salm_species_scores)): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1249 if max_score<float(salm_species_scores[i]): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1250 max_score=float(salm_species_scores[i]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1251 max_score_index=i |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1252 prediction=salm_species_results[max_score_index].split(".")[1].strip().split(" ")[0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1253 #if float(out.split("\n")[1].split("\t")[4]) > float(out.split("\n")[1].split("\t")[5]): #bongori and enterica compare |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1254 if float(out.split("\n")[1].split("\t")[4]) > 10 and float(out.split("\n")[1].split("\t")[4]) > float(out.split("\n")[1].split("\t")[5]): ## ed_SL_0318: change SalmID_ssp_threshold |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1255 prediction="bongori" #if not, the prediction would always be enterica, since they are located in the later part |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1256 #if max_score<10: ## ed_SL_0318: change SalmID_ssp_threshold |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1257 if max_score<60: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1258 prediction="-" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1259 return prediction |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1260 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1261 def judge_subspecies_Kmer(Special_dict): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1262 #seqsero2 -k; |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1263 max_score=0 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1264 prediction="-" #default should be I |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1265 for x in Special_dict: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1266 #if "mer" in x: ## ed_SL_0318: change ssp_threshold |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1267 if "mer" in x and float(Special_dict[x]) > 60: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1268 if max_score<float(Special_dict[x]): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1269 max_score=float(Special_dict[x]) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1270 prediction=x.split("_")[-1].strip() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1271 if x.split("_")[-1].strip()=="bongori" and float(Special_dict[x])>95:#if bongori already, then no need to test enterica |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1272 prediction="bongori" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1273 break |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1274 return prediction |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1275 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1276 ## ed_SL_11232019: add notes for missing antigen |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1277 def check_antigens(ssp,O_antigen,H1_antigen,H2_antigen,NA_note): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1278 antigen_note = '' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1279 if ssp != '-': |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1280 if O_antigen != '-' and H1_antigen == '-' and H2_antigen == '-': # O:-:- |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1281 antigen_note = 'H antigens were not detected. This is an atypical result that should be further investigated. Most Salmonella strains have at least fliC, encoding the Phase 1 H antigen, even if it is not expressed. ' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1282 NA_note = '' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1283 elif O_antigen != '-' and H1_antigen == '-' and H2_antigen != '-': # O:-:H2 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1284 antigen_note = 'fliC was not detected. This is an atypical result that should be further investigated. Most Salmonella strains have fliC, encoding the Phase 1 H antigen, even if it is not expressed. ' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1285 NA_note = '' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1286 elif O_antigen == '-' and H1_antigen != '-': # -:H1:X |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1287 antigen_note = 'O antigen was not detected. This result may be due to a rough strain that has deleted the rfb region. For raw reads input, the k-mer workflow is sometimes more sensitive than the microassembly workflow in detecting O antigen. Caution should be used with this approach because the k-mer result may be due to low levels of contamination. ' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1288 NA_note = '' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1289 elif O_antigen == '-' and H1_antigen == '-' and H2_antigen == '-': # -:-:- |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1290 antigen_note = 'No serotype antigens were detected. This is an atypical result that should be further investigated. ' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1291 NA_note = '' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1292 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1293 antigen_note = 'The input genome cannot be identified as Salmonella. Check the input for taxonomic ID, contamination, or sequencing quality. ' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1294 NA_note = '' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1295 # if [O_antigen, H1_antigen, H2_antigen].count('-') >= 2: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1296 # antigen_note = 'No subspecies marker was detected and less than 2 serotype antigens were detected; further, this genome was not identified as Salmonella. This is an atypical result that should be further investigated. ' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1297 # else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1298 # antigen_note = 'No subspecies marker was detected. This genome may not be Salmonella. This is an atypical result that should be further investigated. ' |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1299 return (antigen_note,NA_note) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1300 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1301 def main(): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1302 #combine SeqSeroK and SeqSero2, also with SalmID |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1303 args = parse_args() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1304 input_file = args.i |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1305 data_type = args.t |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1306 analysis_mode = args.m |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1307 mapping_mode=args.b |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1308 threads=args.p |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1309 make_dir=args.d |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1310 clean_mode=args.c |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1311 sample_name=args.n |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1312 ingore_header=args.s |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1313 k_size=27 #will change for bug fixing |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1314 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1315 ex_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)),'seqsero2_db')) # ed_SL_09152019: add ex_dir for packaging |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1316 seqsero2_db=ex_dir+"/H_and_O_and_specific_genes.fasta" # ed_SL_11092019: change path to database for packaging |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1317 database="H_and_O_and_specific_genes.fasta" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1318 note="Note: " |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1319 NA_note="This predicted serotype is not in the Kauffman-White scheme. " # ed_SL_09272019: add for new output format |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1320 if len(sys.argv)==1: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1321 subprocess.check_call(dirpath+"/SeqSero2_package.py -h",shell=True)#change name of python file |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1322 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1323 request_id = time.strftime("%m_%d_%Y_%H_%M_%S", time.localtime()) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1324 request_id += str(random.randint(1, 10000000)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1325 if make_dir is None: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1326 make_dir="SeqSero_result_"+request_id |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1327 make_dir=os.path.abspath(make_dir) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1328 if os.path.isdir(make_dir): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1329 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1330 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1331 subprocess.check_call("mkdir -p "+make_dir,shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1332 #subprocess.check_call("cp "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1333 #subprocess.check_call("ln -sr "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1334 subprocess.check_call("ln -f -s "+seqsero2_db+" "+" ".join(input_file)+" "+make_dir,shell=True) # ed_SL_11092019: change path to database for packaging |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1335 #subprocess.check_call("ln -f -s "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) ### use -f option to force the replacement of links, remove -r and use absolute path instead to avoid link issue (use 'type=os.path.abspath' in -i argument). |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1336 ############################begin the real analysis |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1337 if analysis_mode=="a": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1338 if data_type in ["1","2","3"]:#use allele mode |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1339 for_fq,rev_fq=get_input_files(make_dir,input_file,data_type,dirpath) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1340 os.chdir(make_dir) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1341 ###add a function to tell input files |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1342 fnameA=for_fq.split("/")[-1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1343 fnameB=rev_fq.split("/")[-1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1344 current_time=time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime()) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1345 sam,bam,sorted_bam,mapped_fq1,mapped_fq2,combined_fq,for_sai,rev_sai=get_temp_file_names(fnameA,fnameB) #get temp files id |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1346 map_and_sort(threads,database,fnameA,fnameB,sam,bam,for_sai,rev_sai,sorted_bam,mapping_mode) #do mapping and sort |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1347 ### avoid error out when micro assembly fails. ed_SL_03172020 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1348 try: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1349 xmlfile,new_fasta=extract_mapped_reads_and_do_assembly_and_blast(current_time,sorted_bam,combined_fq,mapped_fq1,mapped_fq2,threads,fnameA,fnameB,database,mapping_mode) #extract the mapped reads and do micro assembly and blast |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1350 except (UnboundLocalError, subprocess.CalledProcessError): |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1351 xmlfile="NA" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1352 H1_cont_stat_list=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1353 H2_cont_stat_list=[] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1354 ### |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1355 if xmlfile=="NA": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1356 O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H=("-","-","-",[],"","") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1357 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1358 Final_list=xml_parse_score_comparision_seqsero(xmlfile) #analyze xml and get parsed results |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1359 file=open("data_log.txt","a") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1360 for x in Final_list: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1361 file.write("\t".join(str(y) for y in x)+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1362 file.close() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1363 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1].split("_")[0])>=0.9 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]) or x[1]>1000)] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1364 O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H,Otypes_uniq,H1_cont_stat_list,H2_cont_stat_list=predict_O_and_H_types(Final_list,Final_list_passed,new_fasta) #predict O, fliC and fljB |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1365 subspecies=judge_subspecies(fnameA) #predict subspecies |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1366 ###output |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1367 predict_form,predict_sero,star,star_line,claim=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list,subspecies) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1368 claim="" #04132019, disable claim for new report requirement |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1369 contamination_report="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1370 H_list=["fliC_"+x for x in H1_cont_stat_list if len(x)>0]+["fljB_"+x for x in H2_cont_stat_list if len(x)>0] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1371 if contamination_O!="" and contamination_H=="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1372 contamination_report="#Potential inter-serotype contamination detected from O antigen signals. All O-antigens detected:"+"\t".join(Otypes_uniq)+"." |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1373 elif contamination_O=="" and contamination_H!="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1374 contamination_report="#Potential inter-serotype contamination detected or potential thrid H phase from H antigen signals. All H-antigens detected:"+"\t".join(H_list)+"." |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1375 elif contamination_O!="" and contamination_H!="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1376 contamination_report="#Potential inter-serotype contamination detected from both O and H antigen signals.All O-antigens detected:"+"\t".join(Otypes_uniq)+". All H-antigens detected:"+"\t".join(H_list)+"." |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1377 if contamination_report!="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1378 #contamination_report="potential inter-serotype contamination detected (please refer below antigen signal report for details)." #above contamination_reports are for back-up and bug fixing #web-based mode need to be re-used, 04132019 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1379 contamination_report="Co-existence of multiple serotypes detected, indicating potential inter-serotype contamination. See 'Extracted_antigen_alleles.fasta' for detected serotype determinant alleles. " |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1380 #claim="\n"+open("Extracted_antigen_alleles.fasta","r").read()#used to store H and O antigen sequeences #04132019, need to change if using web-version |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1381 #if contamination_report+star_line+claim=="": #0413, new output style |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1382 # note="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1383 #else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1384 # note="Note:" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1385 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1386 ### ed_SL_11232019: add notes for missing antigen |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1387 if O_choice=="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1388 O_choice="-" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1389 antigen_note,NA_note=check_antigens(subspecies,O_choice,fliC_choice,fljB_choice,NA_note) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1390 if sample_name: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1391 print ("Sample name:\t"+sample_name) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1392 ### |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1393 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1394 if clean_mode: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1395 subprocess.check_call("rm -rf ../"+make_dir,shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1396 make_dir="none-output-directory due to '-c' flag" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1397 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1398 new_file=open("SeqSero_result.txt","w") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1399 ### ed_SL_01152020: add new output |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1400 conta_note="yes" if "inter-serotype contamination" in contamination_report else "no" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1401 tsv_file=open("SeqSero_result.tsv","w") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1402 if ingore_header: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1403 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1404 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1405 tsv_file.write("Sample name\tOutput directory\tInput files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted subspecies\tPredicted antigenic profile\tPredicted serotype\tPotential inter-serotype contamination\tNote\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1406 if sample_name: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1407 new_file.write("Sample name:\t"+sample_name+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1408 tsv_file.write(sample_name+'\t') |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1409 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1410 tsv_file.write(input_file[0].split('/')[-1]+'\t') |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1411 ### |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1412 if "N/A" not in predict_sero: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1413 new_file.write("Output directory:\t"+make_dir+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1414 "Input files:\t"+"\t".join(input_file)+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1415 "O antigen prediction:\t"+O_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1416 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1417 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1418 "Predicted subspecies:\t"+subspecies+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1419 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1420 "Predicted serotype:\t"+predict_sero+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1421 note+contamination_report+star_line+claim+antigen_note+"\n")#+## |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1422 tsv_file.write(make_dir+"\t"+" ".join(input_file)+"\t"+O_choice+"\t"+fliC_choice+"\t"+fljB_choice+"\t"+subspecies+"\t"+predict_form+"\t"+predict_sero+"\t"+conta_note+"\t"+contamination_report+star_line+claim+antigen_note+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1423 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1424 #star_line=star_line.strip()+"\tNone such antigenic formula in KW.\n" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1425 star_line="" #04132019, for new output requirement, diable star_line if "NA" in output |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1426 new_file.write("Output directory:\t"+make_dir+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1427 "Input files:\t"+"\t".join(input_file)+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1428 "O antigen prediction:\t"+O_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1429 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1430 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1431 "Predicted subspecies:\t"+subspecies+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1432 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1433 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, add subspecies |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1434 note+NA_note+contamination_report+star_line+claim+antigen_note+"\n")#+## |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1435 tsv_file.write(make_dir+"\t"+" ".join(input_file)+"\t"+O_choice+"\t"+fliC_choice+"\t"+fljB_choice+"\t"+subspecies+"\t"+predict_form+"\t"+subspecies+' '+predict_form+"\t"+conta_note+"\t"+NA_note+contamination_report+star_line+claim+antigen_note+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1436 new_file.close() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1437 tsv_file.close() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1438 #subprocess.check_call("cat Seqsero_result.txt",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1439 #subprocess.call("rm H_and_O_and_specific_genes.fasta* *.sra *.bam *.sam *.fastq *.gz *.fq temp.txt *.xml "+fnameA+"*_db* 2> /dev/null",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1440 subprocess.call("rm H_and_O_and_specific_genes.fasta* *.sra *.bam *.sam *.fastq *.gz *.fq temp.txt "+fnameA+"*_db* 2> /dev/null",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1441 if "N/A" not in predict_sero: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1442 #print("Output_directory:"+make_dir+"\nInput files:\t"+for_fq+" "+rev_fq+"\n"+"O antigen prediction:\t"+O_choice+"\n"+"H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+"H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+"Predicted antigenic profile:\t"+predict_form+"\n"+"Predicted subspecies:\t"+subspecies+"\n"+"Predicted serotype(s):\t"+predict_sero+star+"\nNote:"+contamination_report+star+star_line+claim+"\n")#+## |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1443 print("Output directory:\t"+make_dir+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1444 "Input files:\t"+"\t".join(input_file)+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1445 "O antigen prediction:\t"+O_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1446 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1447 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1448 "Predicted subspecies:\t"+subspecies+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1449 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1450 "Predicted serotype:\t"+predict_sero+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1451 note+contamination_report+star_line+claim+antigen_note+"\n")#+## |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1452 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1453 print("Output directory:\t"+make_dir+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1454 "Input files:\t"+"\t".join(input_file)+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1455 "O antigen prediction:\t"+O_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1456 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1457 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1458 "Predicted subspecies:\t"+subspecies+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1459 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1460 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, subspecies |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1461 note+NA_note+contamination_report+star_line+claim+antigen_note+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1462 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1463 print("Allele modes only support raw reads datatype, i.e. '-t 1 or 2 or 3'; please use '-m k'") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1464 elif analysis_mode=="k": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1465 #ex_dir = os.path.dirname(os.path.realpath(__file__)) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1466 ex_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)),'seqsero2_db')) # ed_SL_09152019: change ex_dir for packaging |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1467 #output_mode = args.mode |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1468 for_fq,rev_fq=get_input_files(make_dir,input_file,data_type,dirpath) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1469 input_file = for_fq #-k will just use forward because not all reads were used |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1470 os.chdir(make_dir) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1471 f = open(ex_dir + '/antigens.pickle', 'rb') |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1472 lib_dict = pickle.load(f) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1473 f.close |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1474 input_Ks=get_input_K(input_file,lib_dict,data_type,k_size) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1475 O_dict,H_dict,Special_dict=get_kmer_dict(lib_dict,input_Ks) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1476 highest_O,highest_fliC,highest_fljB=call_O_and_H_type(O_dict,H_dict,Special_dict,make_dir) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1477 subspecies=judge_subspecies_Kmer(Special_dict) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1478 if subspecies=="IIb" or subspecies=="IIa": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1479 subspecies="II" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1480 predict_form,predict_sero,star,star_line,claim = seqsero_from_formula_to_serotypes( |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1481 highest_O.split('-')[1], highest_fliC, highest_fljB, Special_dict,subspecies) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1482 claim="" #no claim any more based on new output requirement |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1483 #if star_line+claim=="": #0413, new output style |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1484 # note="" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1485 #else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1486 # note="Note:" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1487 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1488 ### ed_SL_11232019: add notes for missing antigen |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1489 if highest_O.split('-')[-1]=="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1490 O_choice="-" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1491 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1492 O_choice=highest_O.split('-')[-1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1493 antigen_note,NA_note=check_antigens(subspecies,O_choice,highest_fliC,highest_fljB,NA_note) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1494 if sample_name: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1495 print ("Sample name:\t"+sample_name) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1496 ### |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1497 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1498 if clean_mode: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1499 subprocess.check_call("rm -rf ../"+make_dir,shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1500 make_dir="none-output-directory due to '-c' flag" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1501 # ### ed_SL_05282019, fix the assignment issue of variable 'O_choice' using "-m k -c" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1502 # if highest_O.split('-')[-1]=="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1503 # O_choice="-" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1504 # else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1505 # O_choice=highest_O.split('-')[-1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1506 # ### |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1507 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1508 # if highest_O.split('-')[-1]=="": |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1509 # O_choice="-" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1510 # else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1511 # O_choice=highest_O.split('-')[-1] |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1512 #print("Output_directory:"+make_dir+"\tInput_file:"+input_file+"\tPredicted subpecies:"+subspecies + '\tPredicted antigenic profile:' + predict_form + '\tPredicted serotype(s):' + predict_sero) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1513 new_file=open("SeqSero_result.txt","w") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1514 #new_file.write("Output_directory:"+make_dir+"\nInput files:\t"+input_file+"\n"+"O antigen prediction:\t"+O_choice+"\n"+"H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+"H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+"Predicted antigenic profile:\t"+predict_form+"\n"+"Predicted subspecies:\t"+subspecies+"\n"+"Predicted serotype(s):\t"+predict_sero+star+"\n"+star+star_line+claim+"\n")#+## |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1515 ### ed_SL_01152020: add new output |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1516 tsv_file=open("SeqSero_result.tsv","w") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1517 if ingore_header: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1518 pass |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1519 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1520 tsv_file.write("Sample name\tOutput directory\tInput files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted subspecies\tPredicted antigenic profile\tPredicted serotype\tNote\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1521 if sample_name: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1522 new_file.write("Sample name:\t"+sample_name+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1523 tsv_file.write(sample_name+'\t') |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1524 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1525 tsv_file.write(input_file.split('/')[-1]+'\t') |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1526 ### |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1527 if "N/A" not in predict_sero: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1528 new_file.write("Output directory:\t"+make_dir+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1529 "Input files:\t"+input_file+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1530 "O antigen prediction:\t"+O_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1531 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1532 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1533 "Predicted subspecies:\t"+subspecies+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1534 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1535 "Predicted serotype:\t"+predict_sero+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1536 note+star_line+claim+antigen_note+"\n")#+## |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1537 tsv_file.write(make_dir+"\t"+input_file+"\t"+O_choice+"\t"+highest_fliC+"\t"+highest_fljB+"\t"+subspecies+"\t"+predict_form+"\t"+predict_sero+"\t"+star_line+claim+antigen_note+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1538 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1539 #star_line=star_line.strip()+"\tNone such antigenic formula in KW.\n" |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1540 star_line = "" #changed for new output requirement, 04132019 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1541 new_file.write("Output directory:\t"+make_dir+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1542 "Input files:\t"+input_file+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1543 "O antigen prediction:\t"+O_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1544 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1545 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1546 "Predicted subspecies:\t"+subspecies+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1547 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1548 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, subspecies |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1549 note+NA_note+star_line+claim+antigen_note+"\n")#+## |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1550 tsv_file.write(make_dir+"\t"+input_file+"\t"+O_choice+"\t"+highest_fliC+"\t"+highest_fljB+"\t"+subspecies+"\t"+predict_form+"\t"+subspecies+' '+predict_form+"\t"+NA_note+star_line+claim+antigen_note+"\n") |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1551 new_file.close() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1552 tsv_file.close() |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1553 subprocess.call("rm *.fasta* *.fastq *.gz *.fq temp.txt *.sra 2> /dev/null",shell=True) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1554 if "N/A" not in predict_sero: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1555 print("Output directory:\t"+make_dir+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1556 "Input files:\t"+input_file+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1557 "O antigen prediction:\t"+O_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1558 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1559 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1560 "Predicted subspecies:\t"+subspecies+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1561 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1562 "Predicted serotype:\t"+predict_sero+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1563 note+star_line+claim+antigen_note+"\n")#+## |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1564 else: |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1565 print("Output directory:\t"+make_dir+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1566 "Input files:\t"+input_file+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1567 "O antigen prediction:\t"+O_choice+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1568 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1569 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1570 "Predicted subspecies:\t"+subspecies+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1571 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1572 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, subspecies |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1573 note+NA_note+star_line+claim+antigen_note+"\n")#+## |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1574 |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1575 if __name__ == '__main__': |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
diff
changeset
|
1576 main() |