Mercurial > repos > cstrittmatter > ss2v110
annotate bin/SeqSero2_package.py @ 12:08832c0d3cbd draft
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
author | cstrittmatter |
---|---|
date | Fri, 15 May 2020 10:19:08 -0400 |
parents | e6437d423693 |
children |
rev | line source |
---|---|
0
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
2 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
3 import sys |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
4 import time |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
5 import random |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
6 import os |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
7 import subprocess |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
8 import gzip |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
9 import io |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
10 import pickle |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
11 import argparse |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
12 import itertools |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
13 from distutils.version import LooseVersion |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
14 from distutils.spawn import find_executable |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
15 sys.path.insert(1,sys.path[0]+'/..') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
16 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
17 try: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
18 from .version import SeqSero2_version |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
19 except Exception: #ImportError |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
20 from version import SeqSero2_version |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
21 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
22 ### SeqSero Kmer |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
23 def parse_args(): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
24 "Parse the input arguments, use '-h' for help." |
8
357e38526e2a
planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
cstrittmatter
parents:
7
diff
changeset
|
25 parser = argparse.ArgumentParser(usage='SeqSero2_package.py -t <data_type> -m <mode> -i <input_data> [-d <output_directory>] [-p <number of threads>] [-b <BWA_algorithm>]\n\nDevelopper: Shaokang Zhang (zskzsk@uga.edu), Hendrik C Den-Bakker (Hendrik.DenBakker@uga.edu) and Xiangyu Deng (xdeng@uga.edu)\n\nContact email:seqsero@gmail.com\n\nVersion: v1.1.1')#add "-m <data_type>" in future |
0
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
26 parser.add_argument("-i",nargs="+",help="<string>: path/to/input_data",type=os.path.abspath) ### add 'type=os.path.abspath' to generate absolute path of input data. |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
27 parser.add_argument("-t",choices=['1','2','3','4','5','6'],help="<int>: '1' for interleaved paired-end reads, '2' for separated paired-end reads, '3' for single reads, '4' for genome assembly, '5' for nanopore fasta, '6' for nanopore fastq") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
28 parser.add_argument("-b",choices=['sam','mem'],default="mem",help="<string>: algorithms for bwa mapping for allele mode; 'mem' for mem, 'sam' for samse/sampe; default=mem; optional; for now we only optimized for default 'mem' mode") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
29 parser.add_argument("-p",default="1",help="<int>: number of threads for allele mode, if p >4, only 4 threads will be used for assembly since the amount of extracted reads is small, default=1") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
30 parser.add_argument("-m",choices=['k','a'],default="a",help="<string>: which workflow to apply, 'a'(raw reads allele micro-assembly), 'k'(raw reads and genome assembly k-mer), default=a") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
31 parser.add_argument("-n",help="<string>: optional, to specify a sample name in the report output") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
32 parser.add_argument("-d",help="<string>: optional, to specify an output directory name, if not set, the output directory would be 'SeqSero_result_'+time stamp+one random number") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
33 parser.add_argument("-c",action="store_true",help="<flag>: if '-c' was flagged, SeqSero2 will only output serotype prediction without the directory containing log files") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
34 parser.add_argument("-s",action="store_true",help="<flag>: if '-s' was flagged, SeqSero2 will not output header in SeqSero_result.tsv") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
35 parser.add_argument("--check",action="store_true",help="<flag>: use '--check' flag to check the required dependencies") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
36 parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + SeqSero2_version) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
37 return parser.parse_args() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
38 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
39 ### check paths of dependencies |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
40 check_dependencies = parse_args().check |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
41 dependencies = ['bwa','samtools','blastn','fastq-dump','spades.py','bedtools','SalmID.py'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
42 if check_dependencies: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
43 for item in dependencies: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
44 ext_path = find_executable(item) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
45 if ext_path is not None: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
46 print ("Using "+item+" - "+ext_path) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
47 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
48 print ("ERROR: can not find "+item+" in PATH") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
49 sys.exit() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
50 ### end of --check |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
51 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
52 def reverse_complement(sequence): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
53 complement = { |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
54 'A': 'T', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
55 'C': 'G', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
56 'G': 'C', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
57 'T': 'A', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
58 'N': 'N', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
59 'M': 'K', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
60 'R': 'Y', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
61 'W': 'W', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
62 'S': 'S', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
63 'Y': 'R', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
64 'K': 'M', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
65 'V': 'B', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
66 'H': 'D', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
67 'D': 'H', |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
68 'B': 'V' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
69 } |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
70 return "".join(complement[base] for base in reversed(sequence)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
71 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
72 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
73 def createKmerDict_reads(list_of_strings, kmer): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
74 kmer_table = {} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
75 for string in list_of_strings: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
76 sequence = string.strip('\n') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
77 for i in range(len(sequence) - kmer + 1): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
78 new_mer = sequence[i:i + kmer].upper() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
79 new_mer_rc = reverse_complement(new_mer) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
80 if new_mer in kmer_table: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
81 kmer_table[new_mer.upper()] += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
82 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
83 kmer_table[new_mer.upper()] = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
84 if new_mer_rc in kmer_table: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
85 kmer_table[new_mer_rc.upper()] += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
86 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
87 kmer_table[new_mer_rc.upper()] = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
88 return kmer_table |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
89 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
90 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
91 def multifasta_dict(multifasta): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
92 multifasta_list = [ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
93 line.strip() for line in open(multifasta, 'r') if len(line.strip()) > 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
94 ] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
95 headers = [i for i in multifasta_list if i[0] == '>'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
96 multifasta_dict = {} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
97 for h in headers: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
98 start = multifasta_list.index(h) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
99 for element in multifasta_list[start + 1:]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
100 if element[0] == '>': |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
101 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
102 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
103 if h[1:] in multifasta_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
104 multifasta_dict[h[1:]] += element |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
105 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
106 multifasta_dict[h[1:]] = element |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
107 return multifasta_dict |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
108 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
109 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
110 def multifasta_single_string(multifasta): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
111 multifasta_list = [ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
112 line.strip() for line in open(multifasta, 'r') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
113 if (len(line.strip()) > 0) and (line.strip()[0] != '>') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
114 ] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
115 return ''.join(multifasta_list) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
116 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
117 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
118 def chunk_a_long_sequence(long_sequence, chunk_size=60): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
119 chunk_list = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
120 steps = len(long_sequence) // 60 #how many chunks |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
121 for i in range(steps): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
122 chunk_list.append(long_sequence[i * chunk_size:(i + 1) * chunk_size]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
123 chunk_list.append(long_sequence[steps * chunk_size:len(long_sequence)]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
124 return chunk_list |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
125 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
126 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
127 def target_multifasta_kmerizer(multifasta, k, kmerDict): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
128 forward_length = 300 #if find the target, put forward 300 bases |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
129 reverse_length = 2200 #if find the target, put backward 2200 bases |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
130 chunk_size = 60 #it will firstly chunk the single long sequence to multiple smaller sequences, it controls the size of those smaller sequences |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
131 target_mers = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
132 long_single_string = multifasta_single_string(multifasta) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
133 multifasta_list = chunk_a_long_sequence(long_single_string, chunk_size) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
134 unit_length = len(multifasta_list[0]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
135 forward_lines = int(forward_length / unit_length) + 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
136 reverse_lines = int(forward_length / unit_length) + 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
137 start_num = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
138 end_num = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
139 for i in range(len(multifasta_list)): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
140 if i not in range(start_num, end_num): #avoid computational repetition |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
141 line = multifasta_list[i] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
142 start = int((len(line) - k) // 2) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
143 s1 = line[start:k + start] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
144 if s1 in kmerDict: #detect it is a potential read or not (use the middle part) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
145 if i - forward_lines >= 0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
146 start_num = i - forward_lines |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
147 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
148 start_num = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
149 if i + reverse_lines <= len(multifasta_list) - 1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
150 end_num = i + reverse_lines |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
151 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
152 end_num = len(multifasta_list) - 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
153 target_list = [ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
154 x.strip() for x in multifasta_list[start_num:end_num] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
155 ] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
156 target_line = "".join(target_list) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
157 target_mers += [ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
158 k1 for k1 in createKmerDict_reads([str(target_line)], k) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
159 ] ##changed k to k1, just want to avoid the mixes of this "k" (kmer) to the "k" above (kmer length) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
160 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
161 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
162 return set(target_mers) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
163 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
164 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
165 def target_read_kmerizer(file, k, kmerDict): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
166 i = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
167 n_reads = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
168 total_coverage = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
169 target_mers = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
170 if file.endswith(".gz"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
171 file_content = io.BufferedReader(gzip.open(file)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
172 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
173 file_content = open(file, "r").readlines() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
174 for line in file_content: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
175 start = int((len(line) - k) // 2) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
176 if i % 4 == 2: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
177 if file.endswith(".gz"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
178 s1 = line[start:k + start].decode() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
179 line = line.decode() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
180 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
181 s1 = line[start:k + start] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
182 if s1 in kmerDict: #detect it is a potential read or not (use the middle part) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
183 n_reads += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
184 total_coverage += len(line) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
185 target_mers += [ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
186 k1 for k1 in createKmerDict_reads([str(line)], k) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
187 ] #changed k to k1, just want to avoid the mixes of this "k" (kmer) to the "k" above (kmer length) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
188 i += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
189 if total_coverage >= 4000000: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
190 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
191 return set(target_mers) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
192 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
193 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
194 def minion_fasta_kmerizer(file, k, kmerDict): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
195 i = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
196 n_reads = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
197 total_coverage = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
198 target_mers = {} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
199 for line in open(file): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
200 if i % 2 == 0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
201 for kmer, rc_kmer in kmers(line.strip().upper(), k): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
202 if (kmer in kmerDict) or (rc_kmer in kmerDict): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
203 if kmer in target_mers: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
204 target_mers[kmer] += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
205 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
206 target_mers[kmer] = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
207 if rc_kmer in target_mers: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
208 target_mers[rc_kmer] += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
209 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
210 target_mers[rc_kmer] = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
211 i += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
212 return set([h for h in target_mers]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
213 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
214 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
215 def minion_fastq_kmerizer(file, k, kmerDict): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
216 i = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
217 n_reads = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
218 total_coverage = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
219 target_mers = {} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
220 for line in open(file): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
221 if i % 4 == 2: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
222 for kmer, rc_kmer in kmers(line.strip().upper(), k): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
223 if (kmer in kmerDict) or (rc_kmer in kmerDict): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
224 if kmer in target_mers: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
225 target_mers[kmer] += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
226 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
227 target_mers[kmer] = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
228 if rc_kmer in target_mers: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
229 target_mers[rc_kmer] += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
230 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
231 target_mers[rc_kmer] = 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
232 i += 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
233 return set([h for h in target_mers]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
234 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
235 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
236 def multifasta_single_string2(multifasta): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
237 single_string = '' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
238 with open(multifasta, 'r') as f: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
239 for line in f: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
240 if line.strip()[0] == '>': |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
241 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
242 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
243 single_string += line.strip() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
244 return single_string |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
245 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
246 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
247 def kmers(seq, k): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
248 rev_comp = reverse_complement(seq) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
249 for start in range(1, len(seq) - k + 1): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
250 yield seq[start:start + k], rev_comp[-(start + k):-start] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
251 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
252 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
253 def multifasta_to_kmers_dict(multifasta,k_size):#used to create database kmer set |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
254 multi_seq_dict = multifasta_dict(multifasta) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
255 lib_dict = {} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
256 for h in multi_seq_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
257 lib_dict[h] = set( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
258 [k for k in createKmerDict_reads([multi_seq_dict[h]], k_size)]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
259 return lib_dict |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
260 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
261 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
262 def Combine(b, c): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
263 fliC_combinations = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
264 fliC_combinations.append(",".join(c)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
265 temp_combinations = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
266 for i in range(len(b)): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
267 for x in itertools.combinations(b, i + 1): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
268 temp_combinations.append(",".join(x)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
269 for x in temp_combinations: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
270 temp = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
271 for y in c: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
272 temp.append(y) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
273 temp.append(x) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
274 temp = ",".join(temp) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
275 temp = temp.split(",") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
276 temp.sort() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
277 temp = ",".join(temp) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
278 fliC_combinations.append(temp) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
279 return fliC_combinations |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
280 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
281 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
282 def seqsero_from_formula_to_serotypes(Otype, fliC, fljB, special_gene_list,subspecies): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
283 #like test_output_06012017.txt |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
284 #can add more varialbles like sdf-type, sub-species-type in future (we can conclude it into a special-gene-list) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
285 from Initial_Conditions import phase1,phase2,phaseO,sero,subs,remove_list,rename_dict |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
286 rename_dict_not_anymore=[rename_dict[x] for x in rename_dict] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
287 rename_dict_all=rename_dict_not_anymore+list(rename_dict) #used for decide whether to |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
288 seronames = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
289 seronames_none_subspecies=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
290 for i in range(len(phase1)): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
291 fliC_combine = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
292 fljB_combine = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
293 if phaseO[i] == Otype: # no VII in KW, but it's there |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
294 ### for fliC, detect every possible combinations to avoid the effect of "[" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
295 if phase1[i].count("[") == 0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
296 fliC_combine.append(phase1[i]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
297 elif phase1[i].count("[") >= 1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
298 c = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
299 b = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
300 if phase1[i][0] == "[" and phase1[i][-1] == "]" and phase1[i].count( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
301 "[") == 1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
302 content = phase1[i].replace("[", "").replace("]", "") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
303 fliC_combine.append(content) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
304 fliC_combine.append("-") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
305 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
306 for x in phase1[i].split(","): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
307 if "[" in x: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
308 b.append(x.replace("[", "").replace("]", "")) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
309 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
310 c.append(x) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
311 fliC_combine = Combine( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
312 b, c |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
313 ) #Combine will offer every possible combinations of the formula, like f,[g],t: f,t f,g,t |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
314 ### end of fliC "[" detect |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
315 ### for fljB, detect every possible combinations to avoid the effect of "[" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
316 if phase2[i].count("[") == 0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
317 fljB_combine.append(phase2[i]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
318 elif phase2[i].count("[") >= 1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
319 d = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
320 e = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
321 if phase2[i][0] == "[" and phase2[i][-1] == "]" and phase2[i].count( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
322 "[") == 1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
323 content = phase2[i].replace("[", "").replace("]", "") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
324 fljB_combine.append(content) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
325 fljB_combine.append("-") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
326 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
327 for x in phase2[i].split(","): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
328 if "[" in x: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
329 d.append(x.replace("[", "").replace("]", "")) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
330 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
331 e.append(x) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
332 fljB_combine = Combine(d, e) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
333 ### end of fljB "[" detect |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
334 new_fliC = fliC.split( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
335 "," |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
336 ) #because some antigen like r,[i] not follow alphabetical order, so use this one to judge and can avoid missings |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
337 new_fliC.sort() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
338 new_fliC = ",".join(new_fliC) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
339 new_fljB = fljB.split(",") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
340 new_fljB.sort() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
341 new_fljB = ",".join(new_fljB) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
342 if (new_fliC in fliC_combine |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
343 or fliC in fliC_combine) and (new_fljB in fljB_combine |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
344 or fljB in fljB_combine): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
345 ######start, remove_list,rename_dict, added on 11/11/2018 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
346 if sero[i] not in remove_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
347 temp_sero=sero[i] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
348 if temp_sero in rename_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
349 temp_sero=rename_dict[temp_sero] #rename if in the rename list |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
350 if temp_sero not in seronames:#the new sero may already included, if yes, then not consider |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
351 if subs[i] == subspecies: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
352 seronames.append(temp_sero) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
353 seronames_none_subspecies.append(temp_sero) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
354 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
355 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
356 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
357 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
358 ######end, added on 11/11/2018 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
359 #analyze seronames |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
360 subspecies_pointer="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
361 if len(seronames) == 0 and len(seronames_none_subspecies)!=0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
362 # ed_SL_12182019: modified to fix the subspecies output problem |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
363 #seronames=seronames_none_subspecies |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
364 seronames=["N/A"] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
365 #subspecies_pointer="1" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
366 subspecies_pointer="0" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
367 if len(seronames) == 0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
368 seronames = [ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
369 "N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
370 ] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
371 star = "" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
372 star_line = "" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
373 if len(seronames) > 1: #there are two possible predictions for serotypes |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
374 star = "*" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
375 #changed 04072019 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
376 #star_line = "The predicted serotypes share the same general formula:\t" + Otype + ":" + fliC + ":" + fljB + "\n" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
377 if subspecies_pointer=="1" and len(seronames_none_subspecies)!=0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
378 star="*" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
379 star_line="The predicted O and H antigens correspond to serotype '"+(" or ").join(seronames)+"' in the Kauffmann-White scheme. The predicted subspecies by SalmID (github.com/hcdenbakker/SalmID) may not be consistent with subspecies designation in the Kauffmann-White scheme. " + star_line |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
380 #star_line="The formula with this subspieces prediction can't get a serotype in KW manual, and the serotyping prediction was made without considering it."+star_line |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
381 if Otype=="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
382 Otype="-" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
383 predict_form = Otype + ":" + fliC + ":" + fljB |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
384 predict_sero = (" or ").join(seronames) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
385 ###special test for Enteritidis |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
386 if predict_form == "9:g,m:-": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
387 sdf = "-" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
388 for x in special_gene_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
389 if x.startswith("sdf"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
390 sdf = "+" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
391 #star_line="Detected sdf gene, a marker to differentiate Gallinarum and Enteritidis" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
392 star_line="sdf gene detected. " |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
393 #predict_form = predict_form + " Sdf prediction:" + sdf |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
394 predict_form = predict_form #changed 04072019 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
395 if sdf == "-": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
396 star = "*" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
397 #star_line="Didn't detected sdf gene, a marker to differentiate Gallinarum and Enteritidis" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
398 star_line="sdf gene not detected. " |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
399 #changed in 04072019, for new output |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
400 #star_line = "Additional characterization is necessary to assign a serotype to this strain. Commonly circulating strains of serotype Enteritidis are sdf+, although sdf- strains of serotype Enteritidis are known to exist. Serotype Gallinarum is typically sdf- but should be quite rare. Sdf- strains of serotype Enteritidis and serotype Gallinarum can be differentiated by phenotypic profile or genetic criteria.\n" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
401 #predict_sero = "Gallinarum/Enteritidis" #04132019, for new output requirement |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
402 predict_sero = "Gallinarum or Enteritidis" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
403 ###end of special test for Enteritidis |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
404 elif predict_form == "4:i:-": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
405 predict_sero = "I 4,[5],12:i:-" # change serotype name |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
406 elif predict_form == "4:r:-": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
407 predict_sero = "N/A (4:r:-)" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
408 elif predict_form == "4:b:-": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
409 predict_sero = "N/A (4:b:-)" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
410 #elif predict_form == "8:e,h:1,2": #removed after official merge of newport and bardo |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
411 #predict_sero = "Newport" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
412 #star = "*" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
413 #star_line = "Serotype Bardo shares the same antigenic profile with Newport, but Bardo is exceedingly rare." |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
414 claim = "The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes.\n" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
415 if "N/A" in predict_sero: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
416 claim = "" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
417 #special test for Typhimurium |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
418 if "Typhimurium" in predict_sero or predict_form == "4:i:-": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
419 normal = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
420 mutation = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
421 for x in special_gene_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
422 if "oafA-O-4_full" in x: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
423 normal = float(special_gene_list[x]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
424 elif "oafA-O-4_5-" in x: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
425 mutation = float(special_gene_list[x]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
426 if normal > mutation: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
427 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
428 elif normal < mutation: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
429 #predict_sero = predict_sero.strip() + "(O5-)" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
430 predict_sero = predict_sero.strip() #diable special sero for new output requirement, 04132019 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
431 star = "*" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
432 #star_line = "Detected the deletion of O5-." |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
433 star_line = "Detected a deletion that causes O5- variant of Typhimurium. " |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
434 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
435 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
436 #special test for Paratyphi B |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
437 if "Paratyphi B" in predict_sero or predict_form == "4:b:-": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
438 normal = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
439 mutation = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
440 for x in special_gene_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
441 if "gntR-family-regulatory-protein_dt-positive" in x: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
442 normal = float(special_gene_list[x]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
443 elif "gntR-family-regulatory-protein_dt-negative" in x: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
444 mutation = float(special_gene_list[x]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
445 #print(normal,mutation) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
446 if normal > mutation: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
447 #predict_sero = predict_sero.strip() + "(dt+)" #diable special sero for new output requirement, 04132019 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
448 predict_sero = predict_sero.strip()+' var. L(+) tartrate+' if "Paratyphi B" in predict_sero else predict_sero.strip() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
449 star = "*" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
450 #star_line = "Didn't detect the SNP for dt- which means this isolate is a Paratyphi B variant L(+) tartrate(+)." |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
451 star_line = "The SNP that causes d-Tartrate nonfermentating phenotype of Paratyphi B was not detected. " |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
452 elif normal < mutation: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
453 #predict_sero = predict_sero.strip() + "(dt-)" #diable special sero for new output requirement, 04132019 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
454 predict_sero = predict_sero.strip() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
455 star = "*" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
456 #star_line = "Detected the SNP for dt- which means this isolate is a systemic pathovar of Paratyphi B." |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
457 star_line = "Detected the SNP for d-Tartrate nonfermenting phenotype of Paratyphi B. " |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
458 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
459 star = "*" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
460 #star_line = " Failed to detect the SNP for dt-, can't decide it's a Paratyphi B variant L(+) tartrate(+) or not." |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
461 star_line = " " ## ed_SL_05152019: do not report this situation. |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
462 #special test for O13,22 and O13,23 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
463 if Otype=="13": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
464 #ex_dir = os.path.dirname(os.path.realpath(__file__)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
465 ex_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)),'seqsero2_db')) # ed_SL_09152019 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
466 f = open(ex_dir + '/special.pickle', 'rb') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
467 special = pickle.load(f) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
468 O22_O23=special['O22_O23'] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
469 if predict_sero.split(" or ")[0] in O22_O23[-1] and predict_sero.split(" or ")[0] not in rename_dict_all:#if in rename_dict_all, then it means already merged, no need to analyze |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
470 O22_score=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
471 O23_score=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
472 for x in special_gene_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
473 if "O:22" in x: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
474 O22_score = O22_score+float(special_gene_list[x]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
475 elif "O:23" in x: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
476 O23_score = O23_score+float(special_gene_list[x]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
477 #print(O22_score,O23_score) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
478 for z in O22_O23[0]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
479 if predict_sero.split(" or ")[0] in z: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
480 if O22_score > O23_score: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
481 star = "*" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
482 #star_line = "Detected O22 specific genes to further differenciate '"+predict_sero+"'." #diabled for new output requirement, 04132019 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
483 predict_sero = z[0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
484 elif O22_score < O23_score: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
485 star = "*" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
486 #star_line = "Detected O23 specific genes to further differenciate '"+predict_sero+"'." #diabled for new output requirement, 04132019 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
487 predict_sero = z[1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
488 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
489 star = "*" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
490 #star_line = "Fail to detect O22 and O23 differences." #diabled for new output requirement, 04132019 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
491 if " or " in predict_sero: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
492 star_line = star_line + "The predicted serotypes share the same general formula: " + Otype + ":" + fliC + ":" + fljB + "." |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
493 #special test for O6,8 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
494 #merge_O68_list=["Blockley","Bovismorbificans","Hadar","Litchfield","Manhattan","Muenchen"] #remove 11/11/2018, because already in merge list |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
495 #for x in merge_O68_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
496 # if x in predict_sero: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
497 # predict_sero=x |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
498 # star="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
499 # star_line="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
500 #special test for Montevideo; most of them are monophasic |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
501 #if "Montevideo" in predict_sero and "1,2,7" in predict_form: #remove 11/11/2018, because already in merge list |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
502 #star="*" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
503 #star_line="Montevideo is almost always monophasic, having an antigen called for the fljB position may be a result of Salmonella-Salmonella contamination." |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
504 return predict_form, predict_sero, star, star_line, claim |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
505 ### End of SeqSero Kmer part |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
506 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
507 ### Begin of SeqSero2 allele prediction and output |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
508 def xml_parse_score_comparision_seqsero(xmlfile): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
509 #used to do seqsero xml analysis |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
510 from Bio.Blast import NCBIXML |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
511 handle=open(xmlfile) |
12
08832c0d3cbd
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
10
diff
changeset
|
512 print(handle) |
0
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
513 handle=NCBIXML.parse(handle) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
514 handle=list(handle) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
515 List=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
516 List_score=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
517 List_ids=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
518 List_query_region=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
519 for i in range(len(handle)): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
520 if len(handle[i].alignments)>0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
521 for j in range(len(handle[i].alignments)): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
522 score=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
523 ids=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
524 cover_region=set() #fixed problem that repeated calculation leading percentage > 1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
525 List.append(handle[i].query.strip()+"___"+handle[i].alignments[j].hit_def) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
526 for z in range(len(handle[i].alignments[j].hsps)): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
527 hsp=handle[i].alignments[j].hsps[z] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
528 temp=set(range(hsp.query_start,hsp.query_end)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
529 if len(cover_region)==0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
530 cover_region=cover_region|temp |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
531 fraction=1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
532 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
533 fraction=1-len(cover_region&temp)/float(len(temp)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
534 cover_region=cover_region|temp |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
535 if "last" in handle[i].query or "first" in handle[i].query: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
536 score+=hsp.bits*fraction |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
537 ids+=float(hsp.identities)/handle[i].query_length*fraction |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
538 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
539 score+=hsp.bits*fraction |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
540 ids+=float(hsp.identities)/handle[i].query_length*fraction |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
541 List_score.append(score) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
542 List_ids.append(ids) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
543 List_query_region.append(cover_region) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
544 temp=zip(List,List_score,List_ids,List_query_region) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
545 Final_list=sorted(temp, key=lambda d:d[1], reverse = True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
546 return Final_list |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
547 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
548 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
549 def Uniq(L,sort_on_fre="none"): #return the uniq list and the count number |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
550 Old=L |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
551 L.sort() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
552 L = [L[i] for i in range(len(L)) if L[i] not in L[:i]] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
553 count=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
554 for j in range(len(L)): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
555 y=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
556 for x in Old: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
557 if L[j]==x: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
558 y+=1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
559 count.append(y) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
560 if sort_on_fre!="none": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
561 d=zip(*sorted(zip(count, L))) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
562 L=d[1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
563 count=d[0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
564 return (L,count) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
565 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
566 def judge_fliC_or_fljB_from_head_tail_for_one_contig(nodes_vs_score_list): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
567 #used to predict it's fliC or fljB for one contig, based on tail and head score, but output the score difference,if it is very small, then not reliable, use blast score for whole contig to test |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
568 #this is mainly used for |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
569 a=nodes_vs_score_list |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
570 fliC_score=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
571 fljB_score=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
572 for z in a: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
573 if "fliC" in z[0]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
574 fliC_score+=z[1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
575 elif "fljB" in z[0]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
576 fljB_score+=z[1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
577 if fliC_score>=fljB_score: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
578 role="fliC" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
579 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
580 role="fljB" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
581 return (role,abs(fliC_score-fljB_score)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
582 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
583 def judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(node_name,Final_list,Final_list_passed): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
584 #used to predict contig is fliC or fljB, if the differnce score value on above head_and_tail is less than 10 (quite small) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
585 #also used when no head or tail got blasted score for the contig |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
586 role="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
587 for z in Final_list_passed: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
588 if node_name in z[0]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
589 role=z[0].split("_")[0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
590 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
591 return role |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
592 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
593 def fliC_or_fljB_judge_from_head_tail_sequence(nodes_list,tail_head_list,Final_list,Final_list_passed): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
594 #nodes_list is the c created by c,d=Uniq(nodes) in below function |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
595 first_target="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
596 role_list=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
597 for x in nodes_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
598 a=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
599 role="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
600 for y in tail_head_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
601 if x in y[0]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
602 a.append(y) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
603 if len(a)==4: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
604 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
605 if diff<20: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
606 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
607 elif len(a)==3: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
608 ###however, if the one with highest score is the fewer one, compare their accumulation score |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
609 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
610 if diff<20: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
611 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
612 ###end of above score comparison |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
613 elif len(a)==2: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
614 #must on same node, if not, then decide with unit blast score, blast-score/length_of_special_sequence(30 or 37) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
615 temp=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
616 for z in a: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
617 temp.append(z[0].split("_")[0]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
618 m,n=Uniq(temp)#should only have one choice, but weird situation might occur too |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
619 if len(m)==1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
620 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
621 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
622 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
623 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
624 if diff<20: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
625 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
626 ###need to desgin a algorithm to guess most possible situation for nodes_list, See the situations of test evaluation |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
627 elif len(a)==1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
628 #that one |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
629 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
630 if diff<20: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
631 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
632 #need to evaluate, in future, may set up a cut-off, if not met, then just find Final_list_passed best match,like when "a==0" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
633 else:#a==0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
634 #use Final_list_passed best match |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
635 for z in Final_list_passed: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
636 if x in z[0]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
637 role=z[0].split("_")[0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
638 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
639 #print x,role,len(a) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
640 role_list.append((role,x)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
641 if len(role_list)==2: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
642 if role_list[0][0]==role_list[1][0]:#this is the most cocmmon error, two antigen were assigned to same phase |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
643 #just use score to do a final test |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
644 role_list=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
645 for x in nodes_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
646 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
647 role_list.append((role,x)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
648 return role_list |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
649 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
650 def decide_contig_roles_for_H_antigen(Final_list,Final_list_passed): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
651 #used to decide which contig is FliC and which one is fljB |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
652 contigs=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
653 nodes=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
654 for x in Final_list_passed: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
655 if x[0].startswith("fl") and "last" not in x[0] and "first" not in x[0]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
656 nodes.append(x[0].split("___")[1].strip()) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
657 c,d=Uniq(nodes)#c is node_list |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
658 #print c |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
659 tail_head_list=[x for x in Final_list if ("last" in x[0] or "first" in x[0])] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
660 roles=fliC_or_fljB_judge_from_head_tail_sequence(c,tail_head_list,Final_list,Final_list_passed) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
661 return roles |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
662 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
663 def decide_O_type_and_get_special_genes(Final_list,Final_list_passed): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
664 #decide O based on Final_list |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
665 O_choice="?" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
666 O_list=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
667 special_genes={} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
668 nodes=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
669 for x in Final_list_passed: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
670 if x[0].startswith("O-"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
671 nodes.append(x[0].split("___")[1].strip()) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
672 elif not x[0].startswith("fl"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
673 special_genes[x[0]]=x[2]#08172018, x[2] changed from x[-1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
674 #print "special_genes:",special_genes |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
675 c,d=Uniq(nodes) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
676 #print "potential O antigen contig",c |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
677 final_O=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
678 O_nodes_list=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
679 for x in c:#c is the list for contigs |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
680 temp=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
681 for y in Final_list_passed: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
682 if x in y[0] and y[0].startswith("O-"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
683 final_O.append(y) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
684 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
685 ### O contig has the problem of two genes on same contig, so do additional test |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
686 potenial_new_gene="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
687 for x in final_O: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
688 pointer=0 #for genes merged or not |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
689 #not consider O-1,3,19_not_in_3,10, too short compared with others |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
690 if "O-1,3,19_not_in_3,10" not in x[0] and int(x[0].split("__")[1].split("___")[0])*x[2]+850 <= int(x[0].split("length_")[1].split("_")[0]):#gene length << contig length; for now give 300*2 (for secureity can use 400*2) as flank region |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
691 pointer=x[0].split("___")[1].strip()#store the contig name |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
692 print(pointer) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
693 if pointer!=0:#it has potential merge event |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
694 for y in Final_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
695 if pointer in y[0] and y not in final_O and (y[1]>=int(y[0].split("__")[1].split("___")[0])*1.5 or (y[1]>=int(y[0].split("__")[1].split("___")[0])*y[2] and y[1]>=400)):#that's a realtively strict filter now; if passed, it has merge event and add one more to final_O |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
696 potenial_new_gene=y |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
697 #print(potenial_new_gene) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
698 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
699 if potenial_new_gene!="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
700 print("two differnt genes in same contig, fix it for O antigen") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
701 print(potenial_new_gene[:3]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
702 pointer=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
703 for y in final_O: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
704 if y[0].split("___")[-1]==potenial_new_gene[0].split("___")[-1]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
705 pointer=1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
706 if pointer!=0: #changed to consider two genes in same contig |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
707 final_O.append(potenial_new_gene) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
708 ### end of the two genes on same contig test |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
709 final_O=sorted(final_O,key=lambda x: x[2], reverse=True)#sorted |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
710 if len(final_O)==0 or (len(final_O)==1 and "O-1,3,19_not_in_3,10" in final_O[0][0]): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
711 #print "$$$No Otype, due to no hit"#may need to be changed |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
712 O_choice="-" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
713 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
714 highest_O_coverage=max([float(x[0].split("_cov_")[-1].split("_")[0]) for x in final_O if "O-1,3,19_not_in_3,10" not in x[0]]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
715 O_list=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
716 O_list_less_contamination=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
717 for x in final_O: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
718 if not "O-1,3,19_not_in_3,10__130" in x[0]:#O-1,3,19_not_in_3,10 is too small, which may affect further analysis; to avoid contamination affect, use 0.15 of highest coverage as cut-off |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
719 O_list.append(x[0].split("__")[0]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
720 O_nodes_list.append(x[0].split("___")[1]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
721 if float(x[0].split("_cov_")[-1].split("_")[0])>highest_O_coverage*0.15: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
722 O_list_less_contamination.append(x[0].split("__")[0]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
723 ### special test for O9,46 and O3,10 family |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
724 if ("O-9,46_wbaV" in O_list or "O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254" in O_list) and O_list_less_contamination[0].startswith("O-9,"):#not sure should use and float(O9_wbaV)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
725 if "O-9,46_wzy" in O_list or "O-9,46_wzy_partial" in O_list:#and float(O946_wzy)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
726 O_choice="O-9,46" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
727 #print "$$$Most possilble Otype: O-9,46" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
728 elif "O-9,46,27_partial_wzy" in O_list:#and float(O94627)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
729 O_choice="O-9,46,27" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
730 #print "$$$Most possilble Otype: O-9,46,27" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
731 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
732 O_choice="O-9"#next, detect O9 vs O2? |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
733 O2=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
734 O9=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
735 for z in special_genes: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
736 if "tyr-O-9" in z: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
737 O9=special_genes[z] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
738 elif "tyr-O-2" in z: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
739 O2=special_genes[z] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
740 if O2>O9: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
741 O_choice="O-2" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
742 elif O2<O9: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
743 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
744 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
745 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
746 #print "$$$No suitable one, because can't distinct it's O-9 or O-2, but O-9 has a more possibility." |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
747 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and (O_list[0].startswith("O-3,10") or O_list_less_contamination[0].startswith("O-9,46_wzy")):#and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
748 if "O-3,10_not_in_1,3,19" in O_list:#and float(O310_no_1319)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
749 O_choice="O-3,10" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
750 #print "$$$Most possilble Otype: O-3,10 (contain O-3,10_not_in_1,3,19)" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
751 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
752 O_choice="O-1,3,19" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
753 #print "$$$Most possilble Otype: O-1,3,19 (not contain O-3,10_not_in_1,3,19)" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
754 ### end of special test for O9,46 and O3,10 family |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
755 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
756 try: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
757 max_score=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
758 for x in final_O: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
759 if x[2]>=max_score and float(x[0].split("_cov_")[-1].split("_")[0])>highest_O_coverage*0.15:#use x[2],08172018, the "coverage identity = cover_length * identity"; also meet coverage threshold |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
760 max_score=x[2]#change from x[-1] to x[2],08172018 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
761 O_choice=x[0].split("_")[0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
762 if O_choice=="O-1,3,19": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
763 O_choice=final_O[1][0].split("_")[0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
764 #print "$$$Most possilble Otype: ",O_choice |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
765 except: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
766 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
767 #print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
768 if O_choice=="O-9,46,27" and len(O_list)==2 and "O-4_wzx" in O_list: #special for very low chance sitatuion between O4 and O9,27,46, this is for serotypes like Bredeney and Schwarzengrund (normallly O-4 will have higher score, but sometimes sequencing quality may affect the prediction) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
769 O_choice="O-4" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
770 #print "O:",O_choice,O_nodes_list |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
771 Otypes=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
772 for x in O_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
773 if x!="O-1,3,19_not_in_3,10": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
774 if "O-9,46_" not in x: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
775 Otypes.append(x.split("_")[0]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
776 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
777 Otypes.append(x.split("-from")[0])#O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
778 #Otypes=[x.split("_")[0] for x in O_list if x!="O-1,3,19_not_in_3,10"] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
779 Otypes_uniq,Otypes_fre=Uniq(Otypes) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
780 contamination_O="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
781 if O_choice=="O-9,46,27" or O_choice=="O-3,10" or O_choice=="O-1,3,19": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
782 if len(Otypes_uniq)>2: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
783 contamination_O="potential contamination from O antigen signals" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
784 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
785 if len(Otypes_uniq)>1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
786 if O_choice=="O-4" and len(Otypes_uniq)==2 and "O-9,46,27" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
787 contamination_O="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
788 elif O_choice=="O-9,46" and len(Otypes_uniq)==2 and "O-9,46_wbaV" in Otypes_uniq and "O-9,46_wzy" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
789 contamination_O="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
790 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
791 contamination_O="potential contamination from O antigen signals" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
792 return O_choice,O_nodes_list,special_genes,final_O,contamination_O,Otypes_uniq |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
793 ### End of SeqSero2 allele prediction and output |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
794 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
795 def get_input_files(make_dir,input_file,data_type,dirpath): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
796 #tell input files from datatype |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
797 #"<int>: '1'(pair-end reads, interleaved),'2'(pair-end reads, seperated),'3'(single-end reads), '4'(assembly),'5'(nanopore fasta),'6'(nanopore fastq)" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
798 for_fq="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
799 rev_fq="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
800 os.chdir(make_dir) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
801 if data_type=="1": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
802 input_file=input_file[0].split("/")[-1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
803 if input_file.endswith(".sra"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
804 subprocess.check_call("fastq-dump --split-files "+input_file,shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
805 for_fq=input_file.replace(".sra","_1.fastq") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
806 rev_fq=input_file.replace(".sra","_2.fastq") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
807 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
808 core_id=input_file.split(".fastq")[0].split(".fq")[0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
809 for_fq=core_id+"_1.fastq" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
810 rev_fq=core_id+"_2.fastq" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
811 if input_file.endswith(".gz"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
812 subprocess.check_call("gzip -dc "+input_file+" | "+dirpath+"/deinterleave_fastq.sh "+for_fq+" "+rev_fq,shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
813 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
814 subprocess.check_call("cat "+input_file+" | "+dirpath+"/deinterleave_fastq.sh "+for_fq+" "+rev_fq,shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
815 elif data_type=="2": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
816 for_fq=input_file[0].split("/")[-1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
817 rev_fq=input_file[1].split("/")[-1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
818 elif data_type=="3": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
819 input_file=input_file[0].split("/")[-1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
820 if input_file.endswith(".sra"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
821 subprocess.check_call("fastq-dump --split-files "+input_file,shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
822 for_fq=input_file.replace(".sra","_1.fastq") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
823 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
824 for_fq=input_file |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
825 elif data_type in ["4","5","6"]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
826 for_fq=input_file[0].split("/")[-1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
827 os.chdir("..") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
828 return for_fq,rev_fq |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
829 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
830 def predict_O_and_H_types(Final_list,Final_list_passed,new_fasta): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
831 #get O and H types from Final_list from blast parsing; allele mode |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
832 from Bio import SeqIO |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
833 fliC_choice="-" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
834 fljB_choice="-" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
835 fliC_contig="NA" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
836 fljB_contig="NA" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
837 fliC_region=set([0]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
838 fljB_region=set([0,]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
839 fliC_length=0 #can be changed to coverage in future; in 03292019, changed to ailgned length |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
840 fljB_length=0 #can be changed to coverage in future; in 03292019, changed to ailgned length |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
841 O_choice="-"#no need to decide O contig for now, should be only one |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
842 O_choice,O_nodes,special_gene_list,O_nodes_roles,contamination_O,Otypes_uniq=decide_O_type_and_get_special_genes(Final_list,Final_list_passed)#decide the O antigen type and also return special-gene-list for further identification |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
843 O_choice=O_choice.split("-")[-1].strip() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
844 if (O_choice=="1,3,19" and len(O_nodes_roles)==1 and "1,3,19" in O_nodes_roles[0][0]) or O_choice=="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
845 O_choice="-" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
846 H_contig_roles=decide_contig_roles_for_H_antigen(Final_list,Final_list_passed)#decide the H antigen contig is fliC or fljB |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
847 #add alignment locations, used for further selection, 03312019 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
848 for i in range(len(H_contig_roles)): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
849 x=H_contig_roles[i] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
850 for y in Final_list_passed: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
851 if x[1] in y[0] and y[0].startswith(x[0]): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
852 H_contig_roles[i]+=H_contig_roles[i]+(y[-1],) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
853 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
854 log_file=open("SeqSero_log.txt","a") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
855 extract_file=open("Extracted_antigen_alleles.fasta","a") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
856 handle_fasta=list(SeqIO.parse(new_fasta,"fasta")) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
857 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
858 #print("O_contigs:") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
859 log_file.write("O_contigs:\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
860 extract_file.write("#Sequences with antigen signals (if the micro-assembled contig only covers the flanking region, it will not be used for contamination analysis)\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
861 extract_file.write("#O_contigs:\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
862 for x in O_nodes_roles: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
863 if "O-1,3,19_not_in_3,10" not in x[0]:#O-1,3,19_not_in_3,10 is just a small size marker |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
864 #print(x[0].split("___")[-1],x[0].split("__")[0],"blast score:",x[1],"identity%:",str(round(x[2]*100,2))+"%",str(min(x[-1]))+" to "+str(max(x[-1]))) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
865 log_file.write(x[0].split("___")[-1]+" "+x[0].split("__")[0]+"; "+"blast score: "+str(x[1])+" identity%: "+str(round(x[2]*100,2))+"%; alignment from "+str(min(x[-1]))+" to "+str(max(x[-1]))+" of antigen\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
866 title=">"+x[0].split("___")[-1]+" "+x[0].split("__")[0]+"; "+"blast score: "+str(x[1])+" identity%: "+str(round(x[2]*100,2))+"%; alignment from "+str(min(x[-1]))+" to "+str(max(x[-1]))+" of antigen\n" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
867 seqs="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
868 for z in handle_fasta: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
869 if x[0].split("___")[-1]==z.description: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
870 seqs=str(z.seq) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
871 extract_file.write(title+seqs+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
872 if len(H_contig_roles)!=0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
873 highest_H_coverage=max([float(x[1].split("_cov_")[-1].split("_")[0]) for x in H_contig_roles]) #less than highest*0.1 would be regarded as contamination and noises, they will still be considered in contamination detection and logs, but not used as final serotype output |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
874 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
875 highest_H_coverage=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
876 for x in H_contig_roles: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
877 #if multiple choices, temporately select the one with longest length for now, will revise in further change |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
878 if "fliC" == x[0] and len(x[-1])>=fliC_length and x[1] not in O_nodes and float(x[1].split("_cov_")[-1].split("_")[0])>highest_H_coverage*0.13:#remember to avoid the effect of O-type contig, so should not in O_node list |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
879 fliC_contig=x[1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
880 fliC_length=len(x[-1]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
881 elif "fljB" == x[0] and len(x[-1])>=fljB_length and x[1] not in O_nodes and float(x[1].split("_cov_")[-1].split("_")[0])>highest_H_coverage*0.13: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
882 fljB_contig=x[1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
883 fljB_length=len(x[-1]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
884 for x in Final_list_passed: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
885 if fliC_choice=="-" and "fliC_" in x[0] and fliC_contig in x[0]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
886 fliC_choice=x[0].split("_")[1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
887 elif fljB_choice=="-" and "fljB_" in x[0] and fljB_contig in x[0]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
888 fljB_choice=x[0].split("_")[1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
889 elif fliC_choice!="-" and fljB_choice!="-": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
890 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
891 #now remove contigs not in middle core part |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
892 first_allele="NA" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
893 first_allele_percentage=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
894 for x in Final_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
895 if x[0].startswith("fliC") or x[0].startswith("fljB"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
896 first_allele=x[0].split("__")[0] #used to filter those un-middle contigs |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
897 first_allele_percentage=x[2] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
898 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
899 additional_contigs=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
900 for x in Final_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
901 if first_allele in x[0]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
902 if (fliC_contig == x[0].split("___")[-1]): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
903 fliC_region=x[3] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
904 elif fljB_contig!="NA" and (fljB_contig == x[0].split("___")[-1]): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
905 fljB_region=x[3] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
906 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
907 if x[1]*1.1>int(x[0].split("___")[1].split("_")[3]):#loose threshold by multiplying 1.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
908 additional_contigs.append(x) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
909 #else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
910 #print x[:3] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
911 #we can just use the fljB region (or fliC depends on size), no matter set() or contain a large locations (without middle part); however, if none of them is fully assembled, use 500 and 1200 as conservative cut-off |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
912 if first_allele_percentage>0.9: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
913 if len(fliC_region)>len(fljB_region) and (max(fljB_region)-min(fljB_region))>1000: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
914 target_region=fljB_region|(fliC_region-set(range(min(fljB_region),max(fljB_region)))) #fljB_region|(fliC_region-set(range(min(fljB_region),max(fljB_region)))) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
915 elif len(fliC_region)<len(fljB_region) and (max(fliC_region)-min(fliC_region))>1000: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
916 target_region=fliC_region|(fljB_region-set(range(min(fliC_region),max(fliC_region)))) #fljB_region|(fliC_region-set(range(min(fljB_region),max(fljB_region)))) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
917 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
918 target_region=set()#doesn't do anything |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
919 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
920 target_region=set()#doesn't do anything |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
921 #print(target_region) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
922 #print(additional_contigs) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
923 target_region2=set(list(range(0,525))+list(range(1200,1700)))#I found to use 500 to 1200 as special region would be best |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
924 target_region=target_region2|target_region |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
925 for x in additional_contigs: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
926 removal=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
927 contig_length=int(x[0].split("___")[1].split("length_")[-1].split("_")[0]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
928 if fljB_contig not in x[0] and fliC_contig not in x[0] and len(target_region&x[3])/float(len(x[3]))>0.65 and contig_length*0.5<len(x[3])<contig_length*1.5: #consider length and alignment length for now, but very loose,0.5 and 1.5 as cut-off |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
929 removal=1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
930 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
931 if first_allele_percentage > 0.9 and float(x[0].split("__")[1].split("___")[0])*x[2]/len(x[-1])>0.96:#if high similiarity with middle part of first allele (first allele >0.9, already cover middle part) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
932 removal=1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
933 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
934 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
935 if removal==1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
936 for y in H_contig_roles: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
937 if y[1] in x[0]: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
938 H_contig_roles.remove(y) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
939 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
940 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
941 #print(x[:3],contig_length,len(target_region&x[3])/float(len(x[3])),contig_length*0.5,len(x[3]),contig_length*1.5) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
942 #end of removing none-middle contigs |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
943 #print("H_contigs:") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
944 log_file.write("H_contigs:\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
945 extract_file.write("#H_contigs:\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
946 H_contig_stat=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
947 H1_cont_stat={} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
948 H2_cont_stat={} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
949 for i in range(len(H_contig_roles)): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
950 x=H_contig_roles[i] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
951 a=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
952 for y in Final_list_passed: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
953 if x[1] in y[0] and y[0].startswith(x[0]): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
954 if "first" in y[0] or "last" in y[0]: #this is the final filter to decide it's fliC or fljB, if can't pass, then can't decide |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
955 for y in Final_list_passed: #it's impossible to has the "first" and "last" allele as prediction, so re-do it |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
956 if x[1] in y[0]:#it's very possible to be third phase allele, so no need to make it must be fliC or fljB |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
957 #print(x[1],"can't_decide_fliC_or_fljB",y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%",str(min(y[-1]))+" to "+str(max(y[-1]))) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
958 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antigen\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
959 H_contig_roles[i]="can't decide fliC or fljB, may be third phase" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
960 title=">"+x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antiten\n" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
961 seqs="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
962 for z in handle_fasta: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
963 if x[1]==z.description: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
964 seqs=str(z.seq) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
965 extract_file.write(title+seqs+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
966 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
967 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
968 #print(x[1],x[0],y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%",str(min(y[-1]))+" to "+str(max(y[-1]))) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
969 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antigen\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
970 title=">"+x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antigen\n" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
971 seqs="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
972 for z in handle_fasta: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
973 if x[1]==z.description: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
974 seqs=str(z.seq) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
975 extract_file.write(title+seqs+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
976 if x[0]=="fliC": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
977 if y[0].split("_")[1] not in H1_cont_stat: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
978 H1_cont_stat[y[0].split("_")[1]]=y[2] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
979 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
980 H1_cont_stat[y[0].split("_")[1]]+=y[2] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
981 if x[0]=="fljB": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
982 if y[0].split("_")[1] not in H2_cont_stat: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
983 H2_cont_stat[y[0].split("_")[1]]=y[2] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
984 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
985 H2_cont_stat[y[0].split("_")[1]]+=y[2] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
986 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
987 #detect contaminations |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
988 #print(H1_cont_stat) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
989 #print(H2_cont_stat) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
990 H1_cont_stat_list=[x for x in H1_cont_stat if H1_cont_stat[x]>0.2] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
991 H2_cont_stat_list=[x for x in H2_cont_stat if H2_cont_stat[x]>0.2] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
992 contamination_H="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
993 if len(H1_cont_stat_list)>1 or len(H2_cont_stat_list)>1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
994 contamination_H="potential contamination from H antigen signals" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
995 elif len(H2_cont_stat_list)==1 and fljB_contig=="NA": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
996 contamination_H="potential contamination from H antigen signals, uncommon weak fljB signals detected" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
997 #get additional antigens |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
998 """ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
999 if ("O-9,46_wbaV" in O_list or "O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254" in O_list) and O_list_less_contamination[0].startswith("O-9,"):#not sure should use and float(O9_wbaV)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1000 if "O-9,46_wzy" in O_list:#and float(O946_wzy)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1001 O_choice="O-9,46" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1002 #print "$$$Most possilble Otype: O-9,46" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1003 elif "O-9,46,27_partial_wzy" in O_list:#and float(O94627)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1004 O_choice="O-9,46,27" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1005 #print "$$$Most possilble Otype: O-9,46,27" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1006 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and (O_list[0].startswith("O-3,10") or O_list_less_contamination[0].startswith("O-9,46_wzy")):#and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1007 if "O-3,10_not_in_1,3,19" in O_list:#and float(O310_no_1319)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1008 O_choice="O-3,10" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1009 #print "$$$Most possilble Otype: O-3,10 (contain O-3,10_not_in_1,3,19)" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1010 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1011 O_choice="O-1,3,19" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1012 #print "$$$Most possilble Otype: O-1,3,19 (not contain O-3,10_not_in_1,3,19)" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1013 ### end of special test for O9,46 and O3,10 family |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1014 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1015 if O_choice=="O-9,46,27" or O_choice=="O-3,10" or O_choice=="O-1,3,19": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1016 if len(Otypes_uniq)>2: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1017 contamination_O="potential contamination from O antigen signals" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1018 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1019 if len(Otypes_uniq)>1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1020 if O_choice=="O-4" and len(Otypes_uniq)==2 and "O-9,46,27" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1021 contamination_O="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1022 elif O_choice=="O-9,46" and len(Otypes_uniq)==2 and "O-9,46_wbaV" in Otypes_uniq and "O-9,46_wzy" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1023 contamination_O="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1024 """ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1025 additonal_antigents=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1026 #print(contamination_O) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1027 #print(contamination_H) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1028 log_file.write(contamination_O+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1029 log_file.write(contamination_H+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1030 log_file.close() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1031 return O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H,Otypes_uniq,H1_cont_stat_list,H2_cont_stat_list |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1032 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1033 def get_input_K(input_file,lib_dict,data_type,k_size): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1034 #kmer mode; get input_Ks from dict and data_type |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1035 kmers = [] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1036 for h in lib_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1037 kmers += lib_dict[h] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1038 if data_type == '4': |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1039 input_Ks = target_multifasta_kmerizer(input_file, k_size, set(kmers)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1040 elif data_type == '1' or data_type == '2' or data_type == '3':#set it for now, will change later |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1041 input_Ks = target_read_kmerizer(input_file, k_size, set(kmers)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1042 elif data_type == '5':#minion_2d_fasta |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1043 input_Ks = minion_fasta_kmerizer(input_file, k_size, set(kmers)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1044 if data_type == '6':#minion_2d_fastq |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1045 input_Ks = minion_fastq_kmerizer(input_file, k_size, set(kmers)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1046 return input_Ks |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1047 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1048 def get_kmer_dict(lib_dict,input_Ks): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1049 #kmer mode; get predicted types |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1050 O_dict = {} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1051 H_dict = {} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1052 Special_dict = {} |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1053 for h in lib_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1054 score = (len(lib_dict[h] & input_Ks) / len(lib_dict[h])) * 100 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1055 if score > 1: # Arbitrary cut-off for similarity score very low but seems necessary to detect O-3,10 in some cases |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1056 if h.startswith('O-') and score > 25: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1057 O_dict[h] = score |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1058 if h.startswith('fl') and score > 40: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1059 H_dict[h] = score |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1060 if (h[:2] != 'fl') and (h[:2] != 'O-'): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1061 Special_dict[h] = score |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1062 return O_dict,H_dict,Special_dict |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1063 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1064 def call_O_and_H_type(O_dict,H_dict,Special_dict,make_dir): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1065 log_file=open("SeqSero_log.txt","a") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1066 log_file.write("O_scores:\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1067 #call O: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1068 highest_O = '-' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1069 if len(O_dict) == 0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1070 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1071 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1072 for x in O_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1073 log_file.write(x+"\t"+str(O_dict[x])+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1074 if ('O-9,46_wbaV__1002' in O_dict and O_dict['O-9,46_wbaV__1002']>70) or ("O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002" in O_dict and O_dict['O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002']>70): # not sure should use and float(O9_wbaV)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1075 #if 'O-9,46_wzy__1191' in O_dict or "O-9,46_wzy_partial__216" in O_dict: # and float(O946_wzy)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1076 #modified to fix miscall of O-9,46 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1077 if ('O-9,46_wzy__1191' in O_dict and O_dict['O-9,46_wzy__1191']>40) or ("O-9,46_wzy_partial__216" in O_dict and O_dict["O-9,46_wzy_partial__216"]>40): # and float(O946_wzy)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1078 highest_O = "O-9,46" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1079 elif "O-9,46,27_partial_wzy__1019" in O_dict: # and float(O94627)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1080 highest_O = "O-9,46,27" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1081 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1082 highest_O = "O-9" # next, detect O9 vs O2? |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1083 O2 = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1084 O9 = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1085 for z in Special_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1086 if "tyr-O-9" in z: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1087 O9 = float(Special_dict[z]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1088 if "tyr-O-2" in z: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1089 O2 = float(Special_dict[z]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1090 if O2 > O9: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1091 highest_O = "O-2" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1092 elif ("O-3,10_wzx__1539" in O_dict) and ( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1093 "O-9,46_wzy__1191" in O_dict |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1094 ): # and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1095 if "O-3,10_not_in_1,3,19__1519" in O_dict: # and float(O310_no_1319)/float(num_1) > 0.1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1096 highest_O = "O-3,10" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1097 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1098 highest_O = "O-1,3,19" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1099 ### end of special test for O9,46 and O3,10 family |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1100 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1101 try: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1102 max_score = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1103 for x in O_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1104 if float(O_dict[x]) >= max_score: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1105 max_score = float(O_dict[x]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1106 #highest_O = x.split("_")[0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1107 # ed_SL_12182019: modified to fix the O-9,46 error example1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1108 if (x == 'O-9,46_wbaV__1002' or x == 'O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002') and ('O-9,46_wzy__1191' not in O_dict and 'O-9,46_wzy_partial__216' not in O_dict): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1109 highest_O = "O-9" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1110 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1111 highest_O = x.split("_")[0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1112 if highest_O == "O-1,3,19": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1113 highest_O = '-' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1114 max_score = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1115 for x in O_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1116 if x == 'O-1,3,19_not_in_3,10__130': |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1117 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1118 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1119 if float(O_dict[x]) >= max_score: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1120 max_score = float(O_dict[x]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1121 #highest_O = x.split("_")[0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1122 # ed_SL_12182019: modified to fix the O-9,46 error example1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1123 if (x == 'O-9,46_wbaV__1002' or x == 'O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002') and ('O-9,46_wzy__1191' not in O_dict and 'O-9,46_wzy_partial__216' not in O_dict): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1124 highest_O = "O-9" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1125 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1126 highest_O = x.split("_")[0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1127 except: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1128 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1129 #call_fliC: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1130 if len(H_dict)!=0: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1131 highest_H_score_both_BC=H_dict[max(H_dict.keys(), key=(lambda k: H_dict[k]))] #used to detect whether fljB existed or not |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1132 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1133 highest_H_score_both_BC=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1134 highest_fliC = '-' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1135 highest_fliC_raw = '-' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1136 highest_Score = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1137 log_file.write("\nH_scores:\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1138 for s in H_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1139 log_file.write(s+"\t"+str(H_dict[s])+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1140 if s.startswith('fliC'): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1141 if float(H_dict[s]) > highest_Score: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1142 highest_fliC = s.split('_')[1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1143 highest_fliC_raw = s |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1144 highest_Score = float(H_dict[s]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1145 #call_fljB |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1146 highest_fljB = '-' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1147 highest_fljB_raw = '-' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1148 highest_Score = 0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1149 for s in H_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1150 if s.startswith('fljB'): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1151 if float(H_dict[s]) > highest_Score and float(H_dict[s]) > highest_H_score_both_BC * 0.65: #fljB is special, so use highest_H_score_both_BC to give a general estimate of coverage, currently 0.65 seems pretty good; the reason use a high (0.65) is some fliC and fljB shared with each other |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1152 #highest_fljB = s.split('_')[1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1153 #highest_fljB_raw = s |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1154 #highest_Score = float(H_dict[s]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1155 if s.split('_')[1]!=highest_fliC: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1156 highest_fljB = s.split('_')[1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1157 highest_fljB_raw = s |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1158 highest_Score = float(H_dict[s]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1159 log_file.write("\nSpecial_scores:\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1160 for s in Special_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1161 log_file.write(s+"\t"+str(Special_dict[s])+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1162 log_file.close() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1163 return highest_O,highest_fliC,highest_fljB |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1164 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1165 def get_temp_file_names(for_fq,rev_fq): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1166 #seqsero2 -a; get temp file names |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1167 sam=for_fq+".sam" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1168 bam=for_fq+".bam" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1169 sorted_bam=for_fq+"_sorted.bam" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1170 mapped_fq1=for_fq+"_mapped.fq" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1171 mapped_fq2=rev_fq+"_mapped.fq" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1172 combined_fq=for_fq+"_combined.fq" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1173 for_sai=for_fq+".sai" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1174 rev_sai=rev_fq+".sai" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1175 return sam,bam,sorted_bam,mapped_fq1,mapped_fq2,combined_fq,for_sai,rev_sai |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1176 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1177 def map_and_sort(threads,database,fnameA,fnameB,sam,bam,for_sai,rev_sai,sorted_bam,mapping_mode): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1178 #seqsero2 -a; do mapping and sort |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1179 print("building database...") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1180 subprocess.check_call("bwa index "+database+ " 2>> data_log.txt",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1181 print("mapping...") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1182 if mapping_mode=="mem": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1183 subprocess.check_call("bwa mem -k 17 -t "+threads+" "+database+" "+fnameA+" "+fnameB+" > "+sam+ " 2>> data_log.txt",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1184 elif mapping_mode=="sam": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1185 if fnameB!="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1186 subprocess.check_call("bwa aln -t "+threads+" "+database+" "+fnameA+" > "+for_sai+ " 2>> data_log.txt",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1187 subprocess.check_call("bwa aln -t "+threads+" "+database+" "+fnameB+" > "+rev_sai+ " 2>> data_log.txt",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1188 subprocess.check_call("bwa sampe "+database+" "+for_sai+" "+ rev_sai+" "+fnameA+" "+fnameB+" > "+sam+ " 2>> data_log.txt",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1189 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1190 subprocess.check_call("bwa aln -t "+threads+" "+database+" "+fnameA+" > "+for_sai+ " 2>> data_log.txt",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1191 subprocess.check_call("bwa samse "+database+" "+for_sai+" "+for_fq+" > "+sam) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1192 subprocess.check_call("samtools view -@ "+threads+" -F 4 -Sh "+sam+" > "+bam,shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1193 ### check the version of samtools then use differnt commands |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1194 samtools_version=subprocess.Popen(["samtools"],stdout=subprocess.PIPE,stderr=subprocess.PIPE) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1195 out, err = samtools_version.communicate() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1196 version = str(err).split("ersion:")[1].strip().split(" ")[0].strip() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1197 print("check samtools version:",version) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1198 ### end of samtools version check and its analysis |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1199 if LooseVersion(version)<=LooseVersion("1.2"): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1200 subprocess.check_call("samtools sort -@ "+threads+" -n "+bam+" "+fnameA+"_sorted",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1201 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1202 subprocess.check_call("samtools sort -@ "+threads+" -n "+bam+" >"+sorted_bam,shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1203 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1204 def extract_mapped_reads_and_do_assembly_and_blast(current_time,sorted_bam,combined_fq,mapped_fq1,mapped_fq2,threads,fnameA,fnameB,database,mapping_mode): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1205 #seqsero2 -a; extract, assembly and blast |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1206 subprocess.check_call("bamToFastq -i "+sorted_bam+" -fq "+combined_fq,shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1207 #print("fnameA:",fnameA) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1208 #print("fnameB:",fnameB) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1209 if fnameB!="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1210 subprocess.check_call("bamToFastq -i "+sorted_bam+" -fq "+mapped_fq1+" -fq2 "+mapped_fq2 + " 2>> data_log.txt",shell=True)#2> /dev/null if want no output |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1211 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1212 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1213 outdir=current_time+"_temp" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1214 print("assembling...") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1215 if int(threads)>4: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1216 t="4" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1217 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1218 t=threads |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1219 if os.path.getsize(combined_fq)>100 and (fnameB=="" or os.path.getsize(mapped_fq1)>100):#if not, then it's "-:-:-" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1220 if fnameB!="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1221 subprocess.check_call("spades.py --careful --pe1-s "+combined_fq+" --pe1-1 "+mapped_fq1+" --pe1-2 "+mapped_fq2+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1222 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1223 subprocess.check_call("spades.py --careful --pe1-s "+combined_fq+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1224 new_fasta=fnameA+"_"+database+"_"+mapping_mode+".fasta" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1225 #new_fasta=fnameA+"_"+database.split('/')[-1]+"_"+mapping_mode+".fasta" # change path to databse for packaging |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1226 subprocess.check_call("mv "+outdir+"/contigs.fasta "+new_fasta+ " 2> /dev/null",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1227 #os.system("mv "+outdir+"/scaffolds.fasta "+new_fasta+ " 2> /dev/null") contigs.fasta |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1228 subprocess.check_call("rm -rf "+outdir+ " 2> /dev/null",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1229 print("blasting...","\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1230 xmlfile="blasted_output.xml"#fnameA+"-extracted_vs_"+database+"_"+mapping_mode+".xml" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1231 subprocess.check_call('makeblastdb -in '+new_fasta+' -out '+new_fasta+'_db '+'-dbtype nucl >> data_log.txt 2>&1',shell=True) #temp.txt is to forbid the blast result interrupt the output of our program###1/27/2015 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1232 subprocess.check_call("blastn -query "+database+" -db "+new_fasta+"_db -out "+xmlfile+" -outfmt 5 >> data_log.txt 2>&1",shell=True)###1/27/2015; 08272018, remove "-word_size 10" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1233 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1234 xmlfile="NA" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1235 return xmlfile,new_fasta |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1236 |
7
aa54a94b9aeb
planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
cstrittmatter
parents:
0
diff
changeset
|
1237 def judge_subspecies(fnameA,dirpath): |
0
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1238 #seqsero2 -a; judge subspecies on just forward raw reads fastq |
7
aa54a94b9aeb
planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
cstrittmatter
parents:
0
diff
changeset
|
1239 samid_strcmd = "python " + dirpath + "/../SalmID.py -i "+fnameA |
aa54a94b9aeb
planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
cstrittmatter
parents:
0
diff
changeset
|
1240 print(samid_strcmd) |
10
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
8
diff
changeset
|
1241 #seqsero2 -a; judge subspecies on just forward raw reads fastq |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
8
diff
changeset
|
1242 #salmID_output=subprocess.Popen("SalmID.py -i "+fnameA,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) |
7
aa54a94b9aeb
planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
cstrittmatter
parents:
0
diff
changeset
|
1243 salmID_output=subprocess.Popen(samid_strcmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) |
0
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1244 out, err = salmID_output.communicate() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1245 out=out.decode("utf-8") |
10
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
8
diff
changeset
|
1246 print(out) |
e6437d423693
planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
cstrittmatter
parents:
8
diff
changeset
|
1247 print(err) |
0
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1248 file=open("data_log.txt","a") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1249 file.write(out) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1250 file.close() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1251 salm_species_scores=out.split("\n")[1].split("\t")[6:] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1252 salm_species_results=out.split("\n")[0].split("\t")[6:] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1253 max_score=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1254 max_score_index=1 #default is 1, means "I" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1255 for i in range(len(salm_species_scores)): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1256 if max_score<float(salm_species_scores[i]): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1257 max_score=float(salm_species_scores[i]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1258 max_score_index=i |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1259 prediction=salm_species_results[max_score_index].split(".")[1].strip().split(" ")[0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1260 #if float(out.split("\n")[1].split("\t")[4]) > float(out.split("\n")[1].split("\t")[5]): #bongori and enterica compare |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1261 if float(out.split("\n")[1].split("\t")[4]) > 10 and float(out.split("\n")[1].split("\t")[4]) > float(out.split("\n")[1].split("\t")[5]): ## ed_SL_0318: change SalmID_ssp_threshold |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1262 prediction="bongori" #if not, the prediction would always be enterica, since they are located in the later part |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1263 #if max_score<10: ## ed_SL_0318: change SalmID_ssp_threshold |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1264 if max_score<60: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1265 prediction="-" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1266 return prediction |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1267 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1268 def judge_subspecies_Kmer(Special_dict): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1269 #seqsero2 -k; |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1270 max_score=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1271 prediction="-" #default should be I |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1272 for x in Special_dict: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1273 #if "mer" in x: ## ed_SL_0318: change ssp_threshold |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1274 if "mer" in x and float(Special_dict[x]) > 60: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1275 if max_score<float(Special_dict[x]): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1276 max_score=float(Special_dict[x]) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1277 prediction=x.split("_")[-1].strip() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1278 if x.split("_")[-1].strip()=="bongori" and float(Special_dict[x])>95:#if bongori already, then no need to test enterica |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1279 prediction="bongori" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1280 break |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1281 return prediction |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1282 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1283 ## ed_SL_11232019: add notes for missing antigen |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1284 def check_antigens(ssp,O_antigen,H1_antigen,H2_antigen,NA_note): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1285 antigen_note = '' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1286 if ssp != '-': |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1287 if O_antigen != '-' and H1_antigen == '-' and H2_antigen == '-': # O:-:- |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1288 antigen_note = 'H antigens were not detected. This is an atypical result that should be further investigated. Most Salmonella strains have at least fliC, encoding the Phase 1 H antigen, even if it is not expressed. ' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1289 NA_note = '' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1290 elif O_antigen != '-' and H1_antigen == '-' and H2_antigen != '-': # O:-:H2 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1291 antigen_note = 'fliC was not detected. This is an atypical result that should be further investigated. Most Salmonella strains have fliC, encoding the Phase 1 H antigen, even if it is not expressed. ' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1292 NA_note = '' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1293 elif O_antigen == '-' and H1_antigen != '-': # -:H1:X |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1294 antigen_note = 'O antigen was not detected. This result may be due to a rough strain that has deleted the rfb region. For raw reads input, the k-mer workflow is sometimes more sensitive than the microassembly workflow in detecting O antigen. Caution should be used with this approach because the k-mer result may be due to low levels of contamination. ' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1295 NA_note = '' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1296 elif O_antigen == '-' and H1_antigen == '-' and H2_antigen == '-': # -:-:- |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1297 antigen_note = 'No serotype antigens were detected. This is an atypical result that should be further investigated. ' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1298 NA_note = '' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1299 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1300 antigen_note = 'The input genome cannot be identified as Salmonella. Check the input for taxonomic ID, contamination, or sequencing quality. ' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1301 NA_note = '' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1302 # if [O_antigen, H1_antigen, H2_antigen].count('-') >= 2: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1303 # antigen_note = 'No subspecies marker was detected and less than 2 serotype antigens were detected; further, this genome was not identified as Salmonella. This is an atypical result that should be further investigated. ' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1304 # else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1305 # antigen_note = 'No subspecies marker was detected. This genome may not be Salmonella. This is an atypical result that should be further investigated. ' |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1306 return (antigen_note,NA_note) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1307 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1308 def main(): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1309 #combine SeqSeroK and SeqSero2, also with SalmID |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1310 args = parse_args() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1311 input_file = args.i |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1312 data_type = args.t |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1313 analysis_mode = args.m |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1314 mapping_mode=args.b |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1315 threads=args.p |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1316 make_dir=args.d |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1317 clean_mode=args.c |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1318 sample_name=args.n |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1319 ingore_header=args.s |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1320 k_size=27 #will change for bug fixing |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1321 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1322 ex_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)),'seqsero2_db')) # ed_SL_09152019: add ex_dir for packaging |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1323 seqsero2_db=ex_dir+"/H_and_O_and_specific_genes.fasta" # ed_SL_11092019: change path to database for packaging |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1324 database="H_and_O_and_specific_genes.fasta" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1325 note="Note: " |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1326 NA_note="This predicted serotype is not in the Kauffman-White scheme. " # ed_SL_09272019: add for new output format |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1327 if len(sys.argv)==1: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1328 subprocess.check_call(dirpath+"/SeqSero2_package.py -h",shell=True)#change name of python file |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1329 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1330 request_id = time.strftime("%m_%d_%Y_%H_%M_%S", time.localtime()) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1331 request_id += str(random.randint(1, 10000000)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1332 if make_dir is None: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1333 make_dir="SeqSero_result_"+request_id |
8
357e38526e2a
planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
cstrittmatter
parents:
7
diff
changeset
|
1334 make_dir=os.path.abspath(make_dir) |
0
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1335 if os.path.isdir(make_dir): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1336 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1337 else: |
8
357e38526e2a
planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
cstrittmatter
parents:
7
diff
changeset
|
1338 subprocess.check_call("mkdir -p "+make_dir,shell=True) |
0
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1339 #subprocess.check_call("cp "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1340 #subprocess.check_call("ln -sr "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1341 subprocess.check_call("ln -f -s "+seqsero2_db+" "+" ".join(input_file)+" "+make_dir,shell=True) # ed_SL_11092019: change path to database for packaging |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1342 #subprocess.check_call("ln -f -s "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) ### use -f option to force the replacement of links, remove -r and use absolute path instead to avoid link issue (use 'type=os.path.abspath' in -i argument). |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1343 ############################begin the real analysis |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1344 if analysis_mode=="a": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1345 if data_type in ["1","2","3"]:#use allele mode |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1346 for_fq,rev_fq=get_input_files(make_dir,input_file,data_type,dirpath) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1347 os.chdir(make_dir) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1348 ###add a function to tell input files |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1349 fnameA=for_fq.split("/")[-1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1350 fnameB=rev_fq.split("/")[-1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1351 current_time=time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime()) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1352 sam,bam,sorted_bam,mapped_fq1,mapped_fq2,combined_fq,for_sai,rev_sai=get_temp_file_names(fnameA,fnameB) #get temp files id |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1353 map_and_sort(threads,database,fnameA,fnameB,sam,bam,for_sai,rev_sai,sorted_bam,mapping_mode) #do mapping and sort |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1354 ### avoid error out when micro assembly fails. ed_SL_03172020 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1355 try: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1356 xmlfile,new_fasta=extract_mapped_reads_and_do_assembly_and_blast(current_time,sorted_bam,combined_fq,mapped_fq1,mapped_fq2,threads,fnameA,fnameB,database,mapping_mode) #extract the mapped reads and do micro assembly and blast |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1357 except (UnboundLocalError, subprocess.CalledProcessError): |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1358 xmlfile="NA" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1359 H1_cont_stat_list=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1360 H2_cont_stat_list=[] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1361 ### |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1362 if xmlfile=="NA": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1363 O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H=("-","-","-",[],"","") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1364 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1365 Final_list=xml_parse_score_comparision_seqsero(xmlfile) #analyze xml and get parsed results |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1366 file=open("data_log.txt","a") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1367 for x in Final_list: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1368 file.write("\t".join(str(y) for y in x)+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1369 file.close() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1370 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1].split("_")[0])>=0.9 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]) or x[1]>1000)] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1371 O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H,Otypes_uniq,H1_cont_stat_list,H2_cont_stat_list=predict_O_and_H_types(Final_list,Final_list_passed,new_fasta) #predict O, fliC and fljB |
7
aa54a94b9aeb
planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
cstrittmatter
parents:
0
diff
changeset
|
1372 subspecies=judge_subspecies(fnameA,dirpath) #predict subspecies |
0
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1373 ###output |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1374 predict_form,predict_sero,star,star_line,claim=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list,subspecies) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1375 claim="" #04132019, disable claim for new report requirement |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1376 contamination_report="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1377 H_list=["fliC_"+x for x in H1_cont_stat_list if len(x)>0]+["fljB_"+x for x in H2_cont_stat_list if len(x)>0] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1378 if contamination_O!="" and contamination_H=="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1379 contamination_report="#Potential inter-serotype contamination detected from O antigen signals. All O-antigens detected:"+"\t".join(Otypes_uniq)+"." |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1380 elif contamination_O=="" and contamination_H!="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1381 contamination_report="#Potential inter-serotype contamination detected or potential thrid H phase from H antigen signals. All H-antigens detected:"+"\t".join(H_list)+"." |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1382 elif contamination_O!="" and contamination_H!="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1383 contamination_report="#Potential inter-serotype contamination detected from both O and H antigen signals.All O-antigens detected:"+"\t".join(Otypes_uniq)+". All H-antigens detected:"+"\t".join(H_list)+"." |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1384 if contamination_report!="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1385 #contamination_report="potential inter-serotype contamination detected (please refer below antigen signal report for details)." #above contamination_reports are for back-up and bug fixing #web-based mode need to be re-used, 04132019 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1386 contamination_report="Co-existence of multiple serotypes detected, indicating potential inter-serotype contamination. See 'Extracted_antigen_alleles.fasta' for detected serotype determinant alleles. " |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1387 #claim="\n"+open("Extracted_antigen_alleles.fasta","r").read()#used to store H and O antigen sequeences #04132019, need to change if using web-version |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1388 #if contamination_report+star_line+claim=="": #0413, new output style |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1389 # note="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1390 #else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1391 # note="Note:" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1392 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1393 ### ed_SL_11232019: add notes for missing antigen |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1394 if O_choice=="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1395 O_choice="-" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1396 antigen_note,NA_note=check_antigens(subspecies,O_choice,fliC_choice,fljB_choice,NA_note) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1397 if sample_name: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1398 print ("Sample name:\t"+sample_name) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1399 ### |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1400 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1401 if clean_mode: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1402 subprocess.check_call("rm -rf ../"+make_dir,shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1403 make_dir="none-output-directory due to '-c' flag" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1404 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1405 new_file=open("SeqSero_result.txt","w") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1406 ### ed_SL_01152020: add new output |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1407 conta_note="yes" if "inter-serotype contamination" in contamination_report else "no" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1408 tsv_file=open("SeqSero_result.tsv","w") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1409 if ingore_header: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1410 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1411 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1412 tsv_file.write("Sample name\tOutput directory\tInput files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted subspecies\tPredicted antigenic profile\tPredicted serotype\tPotential inter-serotype contamination\tNote\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1413 if sample_name: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1414 new_file.write("Sample name:\t"+sample_name+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1415 tsv_file.write(sample_name+'\t') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1416 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1417 tsv_file.write(input_file[0].split('/')[-1]+'\t') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1418 ### |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1419 if "N/A" not in predict_sero: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1420 new_file.write("Output directory:\t"+make_dir+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1421 "Input files:\t"+"\t".join(input_file)+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1422 "O antigen prediction:\t"+O_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1423 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1424 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1425 "Predicted subspecies:\t"+subspecies+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1426 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1427 "Predicted serotype:\t"+predict_sero+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1428 note+contamination_report+star_line+claim+antigen_note+"\n")#+## |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1429 tsv_file.write(make_dir+"\t"+" ".join(input_file)+"\t"+O_choice+"\t"+fliC_choice+"\t"+fljB_choice+"\t"+subspecies+"\t"+predict_form+"\t"+predict_sero+"\t"+conta_note+"\t"+contamination_report+star_line+claim+antigen_note+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1430 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1431 #star_line=star_line.strip()+"\tNone such antigenic formula in KW.\n" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1432 star_line="" #04132019, for new output requirement, diable star_line if "NA" in output |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1433 new_file.write("Output directory:\t"+make_dir+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1434 "Input files:\t"+"\t".join(input_file)+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1435 "O antigen prediction:\t"+O_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1436 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1437 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1438 "Predicted subspecies:\t"+subspecies+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1439 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1440 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, add subspecies |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1441 note+NA_note+contamination_report+star_line+claim+antigen_note+"\n")#+## |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1442 tsv_file.write(make_dir+"\t"+" ".join(input_file)+"\t"+O_choice+"\t"+fliC_choice+"\t"+fljB_choice+"\t"+subspecies+"\t"+predict_form+"\t"+subspecies+' '+predict_form+"\t"+conta_note+"\t"+NA_note+contamination_report+star_line+claim+antigen_note+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1443 new_file.close() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1444 tsv_file.close() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1445 #subprocess.check_call("cat Seqsero_result.txt",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1446 #subprocess.call("rm H_and_O_and_specific_genes.fasta* *.sra *.bam *.sam *.fastq *.gz *.fq temp.txt *.xml "+fnameA+"*_db* 2> /dev/null",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1447 subprocess.call("rm H_and_O_and_specific_genes.fasta* *.sra *.bam *.sam *.fastq *.gz *.fq temp.txt "+fnameA+"*_db* 2> /dev/null",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1448 if "N/A" not in predict_sero: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1449 #print("Output_directory:"+make_dir+"\nInput files:\t"+for_fq+" "+rev_fq+"\n"+"O antigen prediction:\t"+O_choice+"\n"+"H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+"H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+"Predicted antigenic profile:\t"+predict_form+"\n"+"Predicted subspecies:\t"+subspecies+"\n"+"Predicted serotype(s):\t"+predict_sero+star+"\nNote:"+contamination_report+star+star_line+claim+"\n")#+## |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1450 print("Output directory:\t"+make_dir+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1451 "Input files:\t"+"\t".join(input_file)+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1452 "O antigen prediction:\t"+O_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1453 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1454 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1455 "Predicted subspecies:\t"+subspecies+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1456 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1457 "Predicted serotype:\t"+predict_sero+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1458 note+contamination_report+star_line+claim+antigen_note+"\n")#+## |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1459 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1460 print("Output directory:\t"+make_dir+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1461 "Input files:\t"+"\t".join(input_file)+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1462 "O antigen prediction:\t"+O_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1463 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1464 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1465 "Predicted subspecies:\t"+subspecies+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1466 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1467 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, subspecies |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1468 note+NA_note+contamination_report+star_line+claim+antigen_note+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1469 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1470 print("Allele modes only support raw reads datatype, i.e. '-t 1 or 2 or 3'; please use '-m k'") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1471 elif analysis_mode=="k": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1472 #ex_dir = os.path.dirname(os.path.realpath(__file__)) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1473 ex_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)),'seqsero2_db')) # ed_SL_09152019: change ex_dir for packaging |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1474 #output_mode = args.mode |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1475 for_fq,rev_fq=get_input_files(make_dir,input_file,data_type,dirpath) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1476 input_file = for_fq #-k will just use forward because not all reads were used |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1477 os.chdir(make_dir) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1478 f = open(ex_dir + '/antigens.pickle', 'rb') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1479 lib_dict = pickle.load(f) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1480 f.close |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1481 input_Ks=get_input_K(input_file,lib_dict,data_type,k_size) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1482 O_dict,H_dict,Special_dict=get_kmer_dict(lib_dict,input_Ks) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1483 highest_O,highest_fliC,highest_fljB=call_O_and_H_type(O_dict,H_dict,Special_dict,make_dir) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1484 subspecies=judge_subspecies_Kmer(Special_dict) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1485 if subspecies=="IIb" or subspecies=="IIa": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1486 subspecies="II" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1487 predict_form,predict_sero,star,star_line,claim = seqsero_from_formula_to_serotypes( |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1488 highest_O.split('-')[1], highest_fliC, highest_fljB, Special_dict,subspecies) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1489 claim="" #no claim any more based on new output requirement |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1490 #if star_line+claim=="": #0413, new output style |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1491 # note="" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1492 #else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1493 # note="Note:" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1494 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1495 ### ed_SL_11232019: add notes for missing antigen |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1496 if highest_O.split('-')[-1]=="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1497 O_choice="-" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1498 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1499 O_choice=highest_O.split('-')[-1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1500 antigen_note,NA_note=check_antigens(subspecies,O_choice,highest_fliC,highest_fljB,NA_note) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1501 if sample_name: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1502 print ("Sample name:\t"+sample_name) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1503 ### |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1504 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1505 if clean_mode: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1506 subprocess.check_call("rm -rf ../"+make_dir,shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1507 make_dir="none-output-directory due to '-c' flag" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1508 # ### ed_SL_05282019, fix the assignment issue of variable 'O_choice' using "-m k -c" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1509 # if highest_O.split('-')[-1]=="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1510 # O_choice="-" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1511 # else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1512 # O_choice=highest_O.split('-')[-1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1513 # ### |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1514 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1515 # if highest_O.split('-')[-1]=="": |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1516 # O_choice="-" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1517 # else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1518 # O_choice=highest_O.split('-')[-1] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1519 #print("Output_directory:"+make_dir+"\tInput_file:"+input_file+"\tPredicted subpecies:"+subspecies + '\tPredicted antigenic profile:' + predict_form + '\tPredicted serotype(s):' + predict_sero) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1520 new_file=open("SeqSero_result.txt","w") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1521 #new_file.write("Output_directory:"+make_dir+"\nInput files:\t"+input_file+"\n"+"O antigen prediction:\t"+O_choice+"\n"+"H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+"H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+"Predicted antigenic profile:\t"+predict_form+"\n"+"Predicted subspecies:\t"+subspecies+"\n"+"Predicted serotype(s):\t"+predict_sero+star+"\n"+star+star_line+claim+"\n")#+## |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1522 ### ed_SL_01152020: add new output |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1523 tsv_file=open("SeqSero_result.tsv","w") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1524 if ingore_header: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1525 pass |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1526 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1527 tsv_file.write("Sample name\tOutput directory\tInput files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted subspecies\tPredicted antigenic profile\tPredicted serotype\tNote\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1528 if sample_name: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1529 new_file.write("Sample name:\t"+sample_name+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1530 tsv_file.write(sample_name+'\t') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1531 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1532 tsv_file.write(input_file.split('/')[-1]+'\t') |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1533 ### |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1534 if "N/A" not in predict_sero: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1535 new_file.write("Output directory:\t"+make_dir+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1536 "Input files:\t"+input_file+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1537 "O antigen prediction:\t"+O_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1538 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1539 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1540 "Predicted subspecies:\t"+subspecies+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1541 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1542 "Predicted serotype:\t"+predict_sero+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1543 note+star_line+claim+antigen_note+"\n")#+## |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1544 tsv_file.write(make_dir+"\t"+input_file+"\t"+O_choice+"\t"+highest_fliC+"\t"+highest_fljB+"\t"+subspecies+"\t"+predict_form+"\t"+predict_sero+"\t"+star_line+claim+antigen_note+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1545 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1546 #star_line=star_line.strip()+"\tNone such antigenic formula in KW.\n" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1547 star_line = "" #changed for new output requirement, 04132019 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1548 new_file.write("Output directory:\t"+make_dir+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1549 "Input files:\t"+input_file+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1550 "O antigen prediction:\t"+O_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1551 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1552 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1553 "Predicted subspecies:\t"+subspecies+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1554 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1555 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, subspecies |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1556 note+NA_note+star_line+claim+antigen_note+"\n")#+## |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1557 tsv_file.write(make_dir+"\t"+input_file+"\t"+O_choice+"\t"+highest_fliC+"\t"+highest_fljB+"\t"+subspecies+"\t"+predict_form+"\t"+subspecies+' '+predict_form+"\t"+NA_note+star_line+claim+antigen_note+"\n") |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1558 new_file.close() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1559 tsv_file.close() |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1560 subprocess.call("rm *.fasta* *.fastq *.gz *.fq temp.txt *.sra 2> /dev/null",shell=True) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1561 if "N/A" not in predict_sero: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1562 print("Output directory:\t"+make_dir+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1563 "Input files:\t"+input_file+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1564 "O antigen prediction:\t"+O_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1565 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1566 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1567 "Predicted subspecies:\t"+subspecies+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1568 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1569 "Predicted serotype:\t"+predict_sero+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1570 note+star_line+claim+antigen_note+"\n")#+## |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1571 else: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1572 print("Output directory:\t"+make_dir+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1573 "Input files:\t"+input_file+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1574 "O antigen prediction:\t"+O_choice+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1575 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1576 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1577 "Predicted subspecies:\t"+subspecies+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1578 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1579 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, subspecies |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1580 note+NA_note+star_line+claim+antigen_note+"\n")#+## |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1581 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1582 if __name__ == '__main__': |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1583 main() |