annotate libs/mapping_and_assembly_hybrid.py @ 0:6275272ebcbc draft

planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
author cstrittmatter
date Thu, 21 Dec 2017 12:45:31 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
1 import os,sys,glob,time,itertools,subprocess
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
2 from Initial_Conditions import phase1
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
3 from Initial_Conditions import phase2
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
4 from Initial_Conditions import phaseO
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
5 from Initial_Conditions import sero
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
6 from distutils.version import LooseVersion
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
7
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
8
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
9
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
10
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
11 def xml_parse_score_comparision_seqsero(xmlfile):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
12 #used to do seqsero xml analysis
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
13 from Bio.Blast import NCBIXML
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
14 handle=open(xmlfile)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
15 handle=NCBIXML.parse(handle)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
16 handle=list(handle)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
17 List=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
18 List_score=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
19 List_ids=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
20 for i in range(len(handle)):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
21 if len(handle[i].alignments)>0:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
22 for j in range(len(handle[i].alignments)):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
23 score=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
24 ids=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
25 List.append(handle[i].query.strip()+"___"+handle[i].alignments[j].hit_def)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
26 for z in range(len(handle[i].alignments[j].hsps)):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
27 if "last" in handle[i].query or "first" in handle[i].query:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
28 score+=handle[i].alignments[j].hsps[z].bits
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
29 ids+=float(handle[i].alignments[j].hsps[z].identities)/handle[i].query_length
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
30 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
31 if handle[i].alignments[j].hsps[z].align_length>=30:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
32 #for the long alleles, filter noise parts
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
33 score+=handle[i].alignments[j].hsps[z].bits
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
34 ids+=float(handle[i].alignments[j].hsps[z].identities)/handle[i].query_length
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
35 List_score.append(score)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
36 List_ids.append(ids)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
37 temp=zip(List,List_score,List_ids)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
38 Final_list=sorted(temp, key=lambda d:d[1], reverse = True)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
39 return Final_list
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
40
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
41
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
42 def Uniq(L,sort_on_fre="none"): #return the uniq list and the count number
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
43 Old=L
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
44 L.sort()
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
45 L = [L[i] for i in range(len(L)) if L[i] not in L[:i]]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
46 count=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
47 for j in range(len(L)):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
48 y=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
49 for x in Old:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
50 if L[j]==x:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
51 y+=1
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
52 count.append(y)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
53 if sort_on_fre!="none":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
54 d=zip(*sorted(zip(count, L)))
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
55 L=d[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
56 count=d[0]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
57 return (L,count)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
58
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
59
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
60 def judge_fliC_or_fljB_from_head_tail_for_one_contig(nodes_vs_score_list):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
61 #used to predict it's fliC or fljB for one contig, based on tail and head score, but output the score difference,if it is very small, then not reliable, use blast score for whole contig to test
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
62 #this is mainly used for
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
63 a=nodes_vs_score_list
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
64 fliC_score=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
65 fljB_score=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
66 for z in a:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
67 if "fliC" in z[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
68 fliC_score+=z[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
69 elif "fljB" in z[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
70 fljB_score+=z[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
71 if fliC_score>=fljB_score:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
72 role="fliC"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
73 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
74 role="fljB"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
75 return (role,abs(fliC_score-fljB_score))
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
76
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
77 def judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(node_name,Final_list_passed):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
78 #used to predict contig is fliC or fljB, if the differnce score value on above head_and_tail is less than 10 (quite small)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
79 #also used when no head or tail got blasted score for the contig
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
80 role=""
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
81 for z in Final_list_passed:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
82 if node_name in z[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
83 role=z[0].split("_")[0]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
84 break
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
85 return role
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
86
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
87
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
88 def fliC_or_fljB_judge_from_head_tail_sequence(nodes_list,tail_head_list,Final_list_passed):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
89 #nodes_list is the c created by c,d=Uniq(nodes) in below function
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
90 first_target=""
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
91 role_list=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
92 for x in nodes_list:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
93 a=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
94 role=""
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
95 for y in tail_head_list:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
96 if x in y[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
97 a.append(y)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
98 if len(a)==4:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
99 #compare two heads (37 > 30)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
100 #four contigs, most perfect assembly, high quality
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
101 """
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
102 for z in a:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
103 if "fliC_first_37" in z[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
104 t1=z[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
105 elif "fljB_first_37" in z[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
106 t2=z[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
107 if t1>=t2:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
108 role="fliC"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
109 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
110 role="fljB"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
111 """
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
112 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
113 if diff<20:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
114 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
115 elif len(a)==3:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
116 """
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
117 #compare the number, because hybrid problem
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
118 temp=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
119 for z in a:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
120 temp.append(z[0].split("_")[0])
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
121 m,n=Uniq(temp)#only two choices in m or n
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
122 if n[0]>n[1]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
123 role=m[0]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
124 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
125 role=m[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
126 """
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
127 ###however, if the one with highest score is the fewer one, compare their accumulation score
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
128 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
129 if diff<20:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
130 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
131 ###end of above score comparison
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
132 elif len(a)==2:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
133 #must on same node, if not, then decide with unit blast score, blast-score/length_of_special_sequence(30 or 37)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
134 temp=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
135 for z in a:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
136 temp.append(z[0].split("_")[0])
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
137 m,n=Uniq(temp)#should only have one choice, but weird situation might occur too
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
138 if len(m)==1:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
139 pass
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
140 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
141 pass
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
142 #print "head and tail not belong to same role, now let's guess based on maximum likelihood"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
143 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
144 if diff<20:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
145 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
146 """
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
147 max_unit_score=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
148 for z in a:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
149 unit_score=z[-1]/int(z[0].split("__")[1])
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
150 if unit_score>=max_unit_score:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
151 role=z[0].split("_")[0]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
152 max_unit_score=unit_score
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
153 """
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
154 ###need to desgin a algorithm to guess most possible situation for nodes_list, See the situations of test evaluation
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
155 elif len(a)==1:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
156 #that one
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
157 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
158 if diff<20:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
159 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
160 #role=a[0][0].split("_")[0]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
161 #need to evaluate, in future, may set up a cut-off, if not met, then just find Final_list_passed best match,like when "a==0"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
162 else:#a==0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
163 #use Final_list_passed best match
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
164 for z in Final_list_passed:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
165 if x in z[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
166 role=z[0].split("_")[0]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
167 break
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
168 #print x,role,len(a)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
169 role_list.append((role,x))
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
170 if len(role_list)==2:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
171 if role_list[0][0]==role_list[1][0]:#this is the most cocmmon error, two antigen were assigned to same phase
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
172 #just use score to do a final test
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
173 role_list=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
174 for x in nodes_list:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
175 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
176 role_list.append((role,x))
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
177 return role_list
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
178
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
179 def decide_contig_roles_for_H_antigen(Final_list):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
180 #used to decide which contig is FliC and which one is fljB
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
181 contigs=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
182 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1])>=3.5 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]))]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
183 nodes=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
184 for x in Final_list_passed:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
185 if x[0].startswith("fl") and "last" not in x[0] and "first" not in x[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
186 nodes.append(x[0].split("___")[1].strip())
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
187 c,d=Uniq(nodes)#c is node_list
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
188 #print c
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
189 tail_head_list=[x for x in Final_list if ("last" in x[0] or "first" in x[0])]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
190 roles=fliC_or_fljB_judge_from_head_tail_sequence(c,tail_head_list,Final_list_passed)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
191 return roles
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
192
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
193 def Combine(b,c):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
194 fliC_combinations=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
195 fliC_combinations.append(",".join(c))
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
196 temp_combinations=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
197 for i in range(len(b)):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
198 for x in itertools.combinations(b,i+1):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
199 temp_combinations.append(",".join(x))
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
200 for x in temp_combinations:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
201 temp=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
202 for y in c:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
203 temp.append(y)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
204 temp.append(x)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
205 temp=",".join(temp)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
206 temp=temp.split(",")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
207 temp.sort()
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
208 temp=",".join(temp)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
209 fliC_combinations.append(temp)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
210 return fliC_combinations
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
211
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
212 def decide_O_type_and_get_special_genes(Final_list):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
213 #decide O based on Final_list
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
214 O_choice="?"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
215 O_list=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
216 special_genes=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
217 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1])>=3.5 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]))]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
218 nodes=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
219 for x in Final_list_passed:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
220 if x[0].startswith("O-"):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
221 nodes.append(x[0].split("___")[1].strip())
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
222 elif not x[0].startswith("fl"):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
223 special_genes.append(x)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
224 #print "special_genes:",special_genes
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
225 c,d=Uniq(nodes)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
226 #print "potential O antigen contig",c
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
227 final_O=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
228 O_nodes_list=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
229 for x in c:#c is the list for contigs
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
230 temp=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
231 for y in Final_list_passed:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
232 if x in y[0] and y[0].startswith("O-"):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
233 final_O.append(y)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
234 break
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
235 ### O contig has the problem of two genes on same contig, so do additional test
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
236 potenial_new_gene=""
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
237 for x in final_O:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
238 pointer=0 #for genes merged or not
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
239 #not consider O-1,3,19_not_in_3,10, too short compared with others
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
240 if "O-1,3,19_not_in_3,10" not in x[0] and int(x[0].split("__")[1].split("___")[0])+800 <= int(x[0].split("length_")[1].split("_")[0]):#gene length << contig length; for now give 300*2 (for secureity can use 400*2) as flank region
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
241 pointer=x[0].split("___")[1].strip()#store the contig name
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
242 print pointer
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
243 if pointer!=0:#it has potential merge event
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
244 for y in Final_list:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
245 if pointer in y[0] and y not in final_O and (y[1]>=int(y[0].split("__")[1].split("___")[0])*1.5 or (y[1]>=int(y[0].split("__")[1].split("___")[0])*y[2] and y[1]>=400)):#that's a realtively strict filter now; if passed, it has merge event and add one more to final_O
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
246 potenial_new_gene=y
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
247 print potenial_new_gene
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
248 break
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
249 if potenial_new_gene!="":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
250 print "two differnt genes in same contig, fix it for O antigen"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
251 final_O.append(potenial_new_gene)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
252 ### end of the two genes on same contig test
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
253 if len(final_O)==0:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
254 #print "$$$No Otype, due to no hit"#may need to be changed
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
255 O_choice="-"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
256 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
257 O_list=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
258 for x in final_O:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
259 O_list.append(x[0].split("__")[0])
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
260 if not "O-1,3,19_not_in_3,10__130" in x[0]:#O-1,3,19_not_in_3,10 is too small, which may affect further analysis
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
261 O_nodes_list.append(x[0].split("___")[1])
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
262 ### special test for O9,46 and O3,10 family
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
263 if "O-9,46_wbaV" in O_list:#not sure should use and float(O9_wbaV)/float(num_1) > 0.1
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
264 if "O-9,46_wzy" in O_list:#and float(O946_wzy)/float(num_1) > 0.1
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
265 O_choice="O-9,46"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
266 #print "$$$Most possilble Otype: O-9,46"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
267 elif "O-9,46,27_partial_wzy" in O_list:#and float(O94627)/float(num_1) > 0.1
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
268 O_choice="O-9,46,27"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
269 #print "$$$Most possilble Otype: O-9,46,27"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
270 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
271 O_choice="O-9"#next, detect O9 vs O2?
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
272 O2=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
273 O9=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
274 for z in special_genes:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
275 if "tyr-O-9" in z[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
276 O9=z[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
277 elif "tyr-O-2" in z[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
278 O2=z[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
279 if O2>O9:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
280 O_choice="O-2"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
281 elif O2<O9:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
282 pass
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
283 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
284 pass
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
285 #print "$$$No suitable one, because can't distinct it's O-9 or O-2, but O-9 has a more possibility."
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
286 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list):#and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
287 if "O-3,10_not_in_1,3,19" in O_list:#and float(O310_no_1319)/float(num_1) > 0.1
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
288 O_choice="O-3,10"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
289 #print "$$$Most possilble Otype: O-3,10 (contain O-3,10_not_in_1,3,19)"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
290 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
291 O_choice="O-1,3,19"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
292 #print "$$$Most possilble Otype: O-1,3,19 (not contain O-3,10_not_in_1,3,19)"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
293 ### end of special test for O9,46 and O3,10 family
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
294 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
295 try:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
296 max_score=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
297 for x in final_O:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
298 if x[1]>=max_score:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
299 max_score=x[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
300 O_choice=x[0].split("_")[0]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
301 if O_choice=="O-1,3,19":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
302 O_choice=final_O[1][0].split("_")[0]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
303 #print "$$$Most possilble Otype: ",O_choice
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
304 except:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
305 pass
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
306 #print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
307 #print "O:",O_choice,O_nodes_list
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
308 return O_choice,O_nodes_list,special_genes,final_O
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
309
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
310 def seqsero_from_formula_to_serotypes(Otype,fliC,fljB,special_gene_list):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
311 #like test_output_06012017.txt
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
312 #can add more varialbles like sdf-type, sub-species-type in future (we can conclude it into a special-gene-list)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
313 from Initial_Conditions import phase1
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
314 from Initial_Conditions import phase2
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
315 from Initial_Conditions import phaseO
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
316 from Initial_Conditions import sero
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
317 seronames=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
318 for i in range(len(phase1)):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
319 fliC_combine=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
320 fljB_combine=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
321 if phaseO[i]==Otype:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
322 ### for fliC, detect every possible combinations to avoid the effect of "["
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
323 if phase1[i].count("[")==0:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
324 fliC_combine.append(phase1[i])
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
325 elif phase1[i].count("[")>=1:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
326 c=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
327 b=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
328 if phase1[i][0]=="[" and phase1[i][-1]=="]" and phase1[i].count("[")==1:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
329 content=phase1[i].replace("[","").replace("]","")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
330 fliC_combine.append(content)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
331 fliC_combine.append("-")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
332 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
333 for x in phase1[i].split(","):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
334 if "[" in x:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
335 b.append(x.replace("[","").replace("]",""))
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
336 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
337 c.append(x)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
338 fliC_combine=Combine(b,c) #Combine will offer every possible combinations of the formula, like f,[g],t: f,t f,g,t
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
339 ### end of fliC "[" detect
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
340 ### for fljB, detect every possible combinations to avoid the effect of "["
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
341 if phase2[i].count("[")==0:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
342 fljB_combine.append(phase2[i])
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
343 elif phase2[i].count("[")>=1:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
344 d=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
345 e=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
346 if phase2[i][0]=="[" and phase2[i][-1]=="]" and phase2[i].count("[")==1:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
347 content=phase2[i].replace("[","").replace("]","")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
348 fljB_combine.append(content)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
349 fljB_combine.append("-")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
350 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
351 for x in phase2[i].split(","):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
352 if "[" in x:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
353 d.append(x.replace("[","").replace("]",""))
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
354 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
355 e.append(x)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
356 fljB_combine=Combine(d,e)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
357 ### end of fljB "[" detect
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
358 new_fliC=fliC.split(",") #because some antigen like r,[i] not follow alphabetical order, so use this one to judge and can avoid missings
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
359 new_fliC.sort()
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
360 new_fliC=",".join(new_fliC)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
361 new_fljB=fljB.split(",")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
362 new_fljB.sort()
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
363 new_fljB=",".join(new_fljB)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
364 if (new_fliC in fliC_combine or fliC in fliC_combine) and (new_fljB in fljB_combine or fljB in fljB_combine):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
365 seronames.append(sero[i])
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
366 #analyze seronames
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
367 if len(seronames)==0:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
368 seronames=["N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)"]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
369 star=""
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
370 star_line=""
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
371 if len(seronames)>1:#there are two possible predictions for serotypes
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
372 star="*"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
373 star_line="The predicted serotypes share the same general formula:\t"+Otype+":"+fliC+":"+fljB+"\n"##
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
374 print "\n"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
375 predict_form=Otype+":"+fliC+":"+fljB#
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
376 predict_sero=(" or ").join(seronames)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
377 ###special test for Enteritidis
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
378 if predict_form=="9:g,m:-":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
379 sdf="-"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
380 for x in special_gene_list:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
381 if x[0].startswith("sdf"):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
382 sdf="+"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
383 predict_form=predict_form+"\nSdf prediction:"+sdf
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
384 if sdf=="-":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
385 star="*"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
386 star_line="Additional characterization is necessary to assign a serotype to this strain. Commonly circulating strains of serotype Enteritidis are sdf+, although sdf- strains of serotype Enteritidis are known to exist. Serotype Gallinarum is typically sdf- but should be quite rare. Sdf- strains of serotype Enteritidis and serotype Gallinarum can be differentiated by phenotypic profile or genetic criteria.\n"#+##
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
387 predict_sero="See comments below"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
388 ###end of special test for Enteritidis
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
389 elif predict_form=="4:i:-":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
390 predict_sero="potential monophasic variant of Typhimurium"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
391 elif predict_form=="4:r:-":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
392 predict_sero="potential monophasic variant of Heidelberg"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
393 elif predict_form=="4:b:-":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
394 predict_sero="potential monophasic variant of Paratyphi B"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
395 elif predict_form=="8:e,h:1,2":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
396 predict_sero="Newport"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
397 star="*"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
398 star_line="Serotype Bardo shares the same antigenic profile with Newport, but Bardo is exceedingly rare."
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
399 claim="The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes."##
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
400 if "N/A" in predict_sero:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
401 claim=""
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
402 if "Typhimurium" in predict_sero or predict_form=="4:i:-":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
403 normal=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
404 mutation=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
405 for x in special_gene_list:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
406 if "oafA-O-4_full" in x[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
407 normal=x[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
408 elif "oafA-O-4_5-" in x[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
409 mutation=x[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
410 if normal>mutation:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
411 #print "$$$Typhimurium"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
412 pass
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
413 elif normal<mutation:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
414 predict_sero=predict_sero.strip()+"(O5-)"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
415 star="*"#
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
416 star_line="Detected the deletion of O5-."
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
417 #print "$$$Typhimurium_O5-"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
418 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
419 #print "$$$Typhimurium, even no 7 bases difference"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
420 pass
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
421 return predict_form,predict_sero,star,star_line,claim
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
422
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
423 def main():
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
424 database=sys.argv[1]#used to extract reads
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
425 mapping_mode=sys.argv[2]#mem or sampe
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
426 threads=sys.argv[3]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
427 for_fq=sys.argv[4]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
428 rev_fq=sys.argv[5]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
429 current_time=time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
430 sam=for_fq+".sam"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
431 bam=for_fq+".bam"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
432 sorted_bam=for_fq+"_sorted.bam"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
433 mapped_fq1=for_fq+"_mapped.fq"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
434 mapped_fq2=rev_fq+"_mapped.fq"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
435 combined_fq=for_fq+"_combined.fq"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
436 for_sai=for_fq+".sai"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
437 rev_sai=rev_fq+".sai"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
438 print "building database..."
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
439 #os.system("bwa index "+database+ " 2> /dev/null")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
440 os.system("bwa index "+database+ " 2>> data_log.txt ")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
441 print "mapping..."
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
442 if mapping_mode=="mem":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
443 os.system("bwa mem -t "+threads+" "+database+" "+for_fq+" "+rev_fq+" > "+sam+ " 2>> data_log.txt")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
444 elif mapping_mode=="sam":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
445 os.system("bwa aln -t "+threads+" "+database+" "+for_fq+" > "+for_sai+ " 2>> data_log.txt")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
446 os.system("bwa aln -t "+threads+" "+database+" "+rev_fq+" > "+rev_sai+ " 2>> data_log.txt")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
447 os.system("bwa sampe "+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam+ " 2>> data_log.txt")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
448 os.system("samtools view -@ "+threads+" -F 4 -Sbh "+sam+" > "+bam)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
449 os.system("samtools view -@ "+threads+" -h -o "+sam+" "+bam)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
450 ### check the version of samtools then use differnt commands
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
451 samtools_version=subprocess.Popen(["samtools"],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
452 out, err = samtools_version.communicate()
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
453 version = err.split("ersion:")[1].strip().split(" ")[0].strip()
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
454 print "check samtools version:",version
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
455 if LooseVersion(version)<=LooseVersion("1.2"):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
456 os.system("samtools sort -@ "+threads+" -n "+bam+" "+for_fq+"_sorted")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
457 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
458 os.system("samtools sort -@ "+threads+" -n "+bam+" >"+sorted_bam)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
459 ### end of samtools version check and its analysis
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
460 os.system("bamToFastq -i "+sorted_bam+" -fq "+combined_fq)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
461 os.system("bamToFastq -i "+sorted_bam+" -fq "+mapped_fq1+" -fq2 "+mapped_fq2 + " 2>> data_log.txt")#2> /dev/null if want no output
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
462 outdir=current_time+"_temp"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
463 print "assembling..."
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
464 if int(threads)>4:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
465 t="4"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
466 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
467 t=threads
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
468 os.system("spades.py --careful --pe1-s "+combined_fq+" --pe1-1 "+mapped_fq1+" --pe1-2 "+mapped_fq2+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
469 new_fasta=for_fq+"_"+database+"_"+mapping_mode+".fasta"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
470 os.system("mv "+outdir+"/contigs.fasta "+new_fasta+ " 2> /dev/null")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
471 #os.system("mv "+outdir+"/scaffolds.fasta "+new_fasta+ " 2> /dev/null") contigs.fasta
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
472 os.system("rm -rf "+outdir+ " 2> /dev/null")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
473 ### begin blast
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
474 print "blasting..."
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
475 print "\n"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
476 xmlfile=for_fq+"-extracted_vs_"+database+"_"+mapping_mode+".xml"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
477 os.system('makeblastdb -in '+new_fasta+' -out '+new_fasta+'_db '+'-dbtype nucl >> data_log.txt 2>&1') #temp.txt is to forbid the blast result interrupt the output of our program###1/27/2015
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
478 os.system("blastn -word_size 10 -query "+database+" -db "+new_fasta+"_db -out "+xmlfile+" -outfmt 5 >> data_log.txt 2>&1")###1/27/2015
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
479 Final_list=xml_parse_score_comparision_seqsero(xmlfile)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
480 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1])>=3.5 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]))]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
481 fliC_choice="-"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
482 fljB_choice="-"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
483 fliC_contig="NA"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
484 fljB_contig="NA"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
485 fliC_length=0 #can be changed to coverage in future
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
486 fljB_length=0 #can be changed to coverage in future
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
487 O_choice=""#no need to decide O contig for now, should be only one
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
488 O_choice,O_nodes,special_gene_list,O_nodes_roles=decide_O_type_and_get_special_genes(Final_list)#decide the O antigen type and also return special-gene-list for further identification
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
489 O_choice=O_choice.split("-")[-1].strip()
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
490 H_contig_roles=decide_contig_roles_for_H_antigen(Final_list)#decide the H antigen contig is fliC or fljB
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
491 log_file=open("SeqSero_hybrid_assembly_log.txt","a")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
492 print "O_contigs:"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
493 log_file.write("O_contigs:\n")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
494 for x in O_nodes_roles:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
495 if "O-1,3,19_not_in_3,10" not in x[0]:#O-1,3,19_not_in_3,10 is just a small size marker
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
496 print x[0].split("___")[-1],x[0].split("__")[0],"blast score:",x[1],"identity%:",str(round(x[2]*100,2))+"%"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
497 log_file.write(x[0].split("___")[-1]+" "+x[0].split("__")[0]+" "+"blast score: "+str(x[1])+"identity%:"+str(round(x[2]*100,2))+"%"+"\n")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
498 print "H_contigs:"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
499 log_file.write("H_contigs:\n")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
500 H_contig_stat=[]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
501 for i in range(len(H_contig_roles)):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
502 x=H_contig_roles[i]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
503 a=0
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
504 for y in Final_list_passed:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
505 if x[1] in y[0] and y[0].startswith(x[0]):
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
506 if "first" in y[0] or "last" in y[0]: #this is the final filter to decide it's fliC or fljB, if can't pass, then can't decide
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
507 for y in Final_list_passed: #it's impossible to has the "first" and "last" allele as prediction, so re-do it
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
508 if x[1] in y[0]:#it's very possible to be third phase allele, so no need to make it must be fliC or fljB
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
509 print x[1],"can't_decide_fliC_or_fljB",y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
510 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+" "+"blast_score: "+str(y[1])+" identity%:"+str(round(y[2]*100,2))+"%"+"\n")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
511 H_contig_roles[i]="can't decide fliC or fljB, may be third phase"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
512 break
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
513 else:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
514 print x[1],x[0],y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
515 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+" "+"blast_score: "+str(y[1])+" identity%:"+str(round(y[2]*100,2))+"%"+"\n")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
516 break
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
517 for x in H_contig_roles:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
518 #if multiple choices, temporately select the one with longest length for now, will revise in further change
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
519 if "fliC" == x[0] and int(x[1].split("_")[3])>=fliC_length and x[1] not in O_nodes:#remember to avoid the effect of O-type contig, so should not in O_node list
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
520 fliC_contig=x[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
521 fliC_length=int(x[1].split("_")[3])
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
522 elif "fljB" == x[0] and int(x[1].split("_")[3])>=fljB_length and x[1] not in O_nodes:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
523 fljB_contig=x[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
524 fljB_length=int(x[1].split("_")[3])
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
525 for x in Final_list_passed:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
526 if fliC_choice=="-" and "fliC_" in x[0] and fliC_contig in x[0] :
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
527 fliC_choice=x[0].split("_")[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
528 elif fljB_choice=="-" and "fljB_" in x[0] and fljB_contig in x[0]:
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
529 fljB_choice=x[0].split("_")[1]
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
530 elif fliC_choice!="-" and fljB_choice!="-":
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
531 break
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
532 print "\n"
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
533 print "SeqSero Input files:",for_fq,rev_fq
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
534 print "Most possible O antigen:",O_choice
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
535 print "Most possible H1 antigen:",fliC_choice
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
536 print "Most possible H2 antigen:",fljB_choice
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
537 #print Final_list
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
538 ###output
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
539 predict_form,predict_sero,star,star_line,claim=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list)
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
540 new_file=open("Seqsero_result.txt","w")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
541 new_file.write("Input files:\t"+for_fq+" "+rev_fq+"\n"+"O antigen prediction:\t"+"O-"+O_choice+"\n"+"H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+"H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+"Predicted antigenic profile:\t"+predict_form+"\n"+"Predicted serotype(s):\t"+predict_sero+star+"\n"+star+star_line+claim+"\n")#+##
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
542 new_file.close()
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
543 os.system("cat Seqsero_result.txt")
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
544
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
545 if __name__ == '__main__':
6275272ebcbc planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff changeset
546 main()