Mercurial > repos > cstrittmatter > seqsero_v2
annotate libs/mapping_and_assembly_hybrid.py @ 0:6275272ebcbc draft
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
author | cstrittmatter |
---|---|
date | Thu, 21 Dec 2017 12:45:31 -0500 |
parents | |
children |
rev | line source |
---|---|
0
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
1 import os,sys,glob,time,itertools,subprocess |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
2 from Initial_Conditions import phase1 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
3 from Initial_Conditions import phase2 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
4 from Initial_Conditions import phaseO |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
5 from Initial_Conditions import sero |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
6 from distutils.version import LooseVersion |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
7 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
8 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
9 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
10 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
11 def xml_parse_score_comparision_seqsero(xmlfile): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
12 #used to do seqsero xml analysis |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
13 from Bio.Blast import NCBIXML |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
14 handle=open(xmlfile) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
15 handle=NCBIXML.parse(handle) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
16 handle=list(handle) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
17 List=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
18 List_score=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
19 List_ids=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
20 for i in range(len(handle)): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
21 if len(handle[i].alignments)>0: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
22 for j in range(len(handle[i].alignments)): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
23 score=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
24 ids=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
25 List.append(handle[i].query.strip()+"___"+handle[i].alignments[j].hit_def) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
26 for z in range(len(handle[i].alignments[j].hsps)): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
27 if "last" in handle[i].query or "first" in handle[i].query: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
28 score+=handle[i].alignments[j].hsps[z].bits |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
29 ids+=float(handle[i].alignments[j].hsps[z].identities)/handle[i].query_length |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
30 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
31 if handle[i].alignments[j].hsps[z].align_length>=30: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
32 #for the long alleles, filter noise parts |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
33 score+=handle[i].alignments[j].hsps[z].bits |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
34 ids+=float(handle[i].alignments[j].hsps[z].identities)/handle[i].query_length |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
35 List_score.append(score) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
36 List_ids.append(ids) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
37 temp=zip(List,List_score,List_ids) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
38 Final_list=sorted(temp, key=lambda d:d[1], reverse = True) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
39 return Final_list |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
40 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
41 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
42 def Uniq(L,sort_on_fre="none"): #return the uniq list and the count number |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
43 Old=L |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
44 L.sort() |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
45 L = [L[i] for i in range(len(L)) if L[i] not in L[:i]] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
46 count=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
47 for j in range(len(L)): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
48 y=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
49 for x in Old: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
50 if L[j]==x: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
51 y+=1 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
52 count.append(y) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
53 if sort_on_fre!="none": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
54 d=zip(*sorted(zip(count, L))) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
55 L=d[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
56 count=d[0] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
57 return (L,count) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
58 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
59 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
60 def judge_fliC_or_fljB_from_head_tail_for_one_contig(nodes_vs_score_list): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
61 #used to predict it's fliC or fljB for one contig, based on tail and head score, but output the score difference,if it is very small, then not reliable, use blast score for whole contig to test |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
62 #this is mainly used for |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
63 a=nodes_vs_score_list |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
64 fliC_score=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
65 fljB_score=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
66 for z in a: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
67 if "fliC" in z[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
68 fliC_score+=z[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
69 elif "fljB" in z[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
70 fljB_score+=z[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
71 if fliC_score>=fljB_score: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
72 role="fliC" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
73 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
74 role="fljB" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
75 return (role,abs(fliC_score-fljB_score)) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
76 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
77 def judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(node_name,Final_list_passed): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
78 #used to predict contig is fliC or fljB, if the differnce score value on above head_and_tail is less than 10 (quite small) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
79 #also used when no head or tail got blasted score for the contig |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
80 role="" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
81 for z in Final_list_passed: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
82 if node_name in z[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
83 role=z[0].split("_")[0] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
84 break |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
85 return role |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
86 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
87 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
88 def fliC_or_fljB_judge_from_head_tail_sequence(nodes_list,tail_head_list,Final_list_passed): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
89 #nodes_list is the c created by c,d=Uniq(nodes) in below function |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
90 first_target="" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
91 role_list=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
92 for x in nodes_list: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
93 a=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
94 role="" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
95 for y in tail_head_list: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
96 if x in y[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
97 a.append(y) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
98 if len(a)==4: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
99 #compare two heads (37 > 30) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
100 #four contigs, most perfect assembly, high quality |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
101 """ |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
102 for z in a: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
103 if "fliC_first_37" in z[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
104 t1=z[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
105 elif "fljB_first_37" in z[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
106 t2=z[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
107 if t1>=t2: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
108 role="fliC" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
109 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
110 role="fljB" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
111 """ |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
112 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
113 if diff<20: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
114 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
115 elif len(a)==3: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
116 """ |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
117 #compare the number, because hybrid problem |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
118 temp=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
119 for z in a: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
120 temp.append(z[0].split("_")[0]) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
121 m,n=Uniq(temp)#only two choices in m or n |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
122 if n[0]>n[1]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
123 role=m[0] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
124 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
125 role=m[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
126 """ |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
127 ###however, if the one with highest score is the fewer one, compare their accumulation score |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
128 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
129 if diff<20: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
130 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
131 ###end of above score comparison |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
132 elif len(a)==2: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
133 #must on same node, if not, then decide with unit blast score, blast-score/length_of_special_sequence(30 or 37) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
134 temp=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
135 for z in a: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
136 temp.append(z[0].split("_")[0]) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
137 m,n=Uniq(temp)#should only have one choice, but weird situation might occur too |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
138 if len(m)==1: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
139 pass |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
140 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
141 pass |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
142 #print "head and tail not belong to same role, now let's guess based on maximum likelihood" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
143 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
144 if diff<20: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
145 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
146 """ |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
147 max_unit_score=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
148 for z in a: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
149 unit_score=z[-1]/int(z[0].split("__")[1]) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
150 if unit_score>=max_unit_score: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
151 role=z[0].split("_")[0] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
152 max_unit_score=unit_score |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
153 """ |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
154 ###need to desgin a algorithm to guess most possible situation for nodes_list, See the situations of test evaluation |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
155 elif len(a)==1: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
156 #that one |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
157 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
158 if diff<20: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
159 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
160 #role=a[0][0].split("_")[0] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
161 #need to evaluate, in future, may set up a cut-off, if not met, then just find Final_list_passed best match,like when "a==0" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
162 else:#a==0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
163 #use Final_list_passed best match |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
164 for z in Final_list_passed: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
165 if x in z[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
166 role=z[0].split("_")[0] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
167 break |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
168 #print x,role,len(a) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
169 role_list.append((role,x)) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
170 if len(role_list)==2: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
171 if role_list[0][0]==role_list[1][0]:#this is the most cocmmon error, two antigen were assigned to same phase |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
172 #just use score to do a final test |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
173 role_list=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
174 for x in nodes_list: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
175 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list_passed) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
176 role_list.append((role,x)) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
177 return role_list |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
178 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
179 def decide_contig_roles_for_H_antigen(Final_list): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
180 #used to decide which contig is FliC and which one is fljB |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
181 contigs=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
182 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1])>=3.5 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]))] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
183 nodes=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
184 for x in Final_list_passed: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
185 if x[0].startswith("fl") and "last" not in x[0] and "first" not in x[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
186 nodes.append(x[0].split("___")[1].strip()) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
187 c,d=Uniq(nodes)#c is node_list |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
188 #print c |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
189 tail_head_list=[x for x in Final_list if ("last" in x[0] or "first" in x[0])] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
190 roles=fliC_or_fljB_judge_from_head_tail_sequence(c,tail_head_list,Final_list_passed) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
191 return roles |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
192 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
193 def Combine(b,c): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
194 fliC_combinations=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
195 fliC_combinations.append(",".join(c)) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
196 temp_combinations=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
197 for i in range(len(b)): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
198 for x in itertools.combinations(b,i+1): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
199 temp_combinations.append(",".join(x)) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
200 for x in temp_combinations: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
201 temp=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
202 for y in c: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
203 temp.append(y) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
204 temp.append(x) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
205 temp=",".join(temp) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
206 temp=temp.split(",") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
207 temp.sort() |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
208 temp=",".join(temp) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
209 fliC_combinations.append(temp) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
210 return fliC_combinations |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
211 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
212 def decide_O_type_and_get_special_genes(Final_list): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
213 #decide O based on Final_list |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
214 O_choice="?" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
215 O_list=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
216 special_genes=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
217 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1])>=3.5 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]))] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
218 nodes=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
219 for x in Final_list_passed: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
220 if x[0].startswith("O-"): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
221 nodes.append(x[0].split("___")[1].strip()) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
222 elif not x[0].startswith("fl"): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
223 special_genes.append(x) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
224 #print "special_genes:",special_genes |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
225 c,d=Uniq(nodes) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
226 #print "potential O antigen contig",c |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
227 final_O=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
228 O_nodes_list=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
229 for x in c:#c is the list for contigs |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
230 temp=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
231 for y in Final_list_passed: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
232 if x in y[0] and y[0].startswith("O-"): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
233 final_O.append(y) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
234 break |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
235 ### O contig has the problem of two genes on same contig, so do additional test |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
236 potenial_new_gene="" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
237 for x in final_O: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
238 pointer=0 #for genes merged or not |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
239 #not consider O-1,3,19_not_in_3,10, too short compared with others |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
240 if "O-1,3,19_not_in_3,10" not in x[0] and int(x[0].split("__")[1].split("___")[0])+800 <= int(x[0].split("length_")[1].split("_")[0]):#gene length << contig length; for now give 300*2 (for secureity can use 400*2) as flank region |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
241 pointer=x[0].split("___")[1].strip()#store the contig name |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
242 print pointer |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
243 if pointer!=0:#it has potential merge event |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
244 for y in Final_list: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
245 if pointer in y[0] and y not in final_O and (y[1]>=int(y[0].split("__")[1].split("___")[0])*1.5 or (y[1]>=int(y[0].split("__")[1].split("___")[0])*y[2] and y[1]>=400)):#that's a realtively strict filter now; if passed, it has merge event and add one more to final_O |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
246 potenial_new_gene=y |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
247 print potenial_new_gene |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
248 break |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
249 if potenial_new_gene!="": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
250 print "two differnt genes in same contig, fix it for O antigen" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
251 final_O.append(potenial_new_gene) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
252 ### end of the two genes on same contig test |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
253 if len(final_O)==0: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
254 #print "$$$No Otype, due to no hit"#may need to be changed |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
255 O_choice="-" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
256 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
257 O_list=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
258 for x in final_O: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
259 O_list.append(x[0].split("__")[0]) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
260 if not "O-1,3,19_not_in_3,10__130" in x[0]:#O-1,3,19_not_in_3,10 is too small, which may affect further analysis |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
261 O_nodes_list.append(x[0].split("___")[1]) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
262 ### special test for O9,46 and O3,10 family |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
263 if "O-9,46_wbaV" in O_list:#not sure should use and float(O9_wbaV)/float(num_1) > 0.1 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
264 if "O-9,46_wzy" in O_list:#and float(O946_wzy)/float(num_1) > 0.1 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
265 O_choice="O-9,46" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
266 #print "$$$Most possilble Otype: O-9,46" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
267 elif "O-9,46,27_partial_wzy" in O_list:#and float(O94627)/float(num_1) > 0.1 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
268 O_choice="O-9,46,27" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
269 #print "$$$Most possilble Otype: O-9,46,27" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
270 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
271 O_choice="O-9"#next, detect O9 vs O2? |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
272 O2=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
273 O9=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
274 for z in special_genes: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
275 if "tyr-O-9" in z[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
276 O9=z[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
277 elif "tyr-O-2" in z[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
278 O2=z[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
279 if O2>O9: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
280 O_choice="O-2" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
281 elif O2<O9: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
282 pass |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
283 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
284 pass |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
285 #print "$$$No suitable one, because can't distinct it's O-9 or O-2, but O-9 has a more possibility." |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
286 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list):#and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
287 if "O-3,10_not_in_1,3,19" in O_list:#and float(O310_no_1319)/float(num_1) > 0.1 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
288 O_choice="O-3,10" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
289 #print "$$$Most possilble Otype: O-3,10 (contain O-3,10_not_in_1,3,19)" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
290 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
291 O_choice="O-1,3,19" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
292 #print "$$$Most possilble Otype: O-1,3,19 (not contain O-3,10_not_in_1,3,19)" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
293 ### end of special test for O9,46 and O3,10 family |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
294 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
295 try: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
296 max_score=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
297 for x in final_O: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
298 if x[1]>=max_score: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
299 max_score=x[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
300 O_choice=x[0].split("_")[0] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
301 if O_choice=="O-1,3,19": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
302 O_choice=final_O[1][0].split("_")[0] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
303 #print "$$$Most possilble Otype: ",O_choice |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
304 except: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
305 pass |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
306 #print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
307 #print "O:",O_choice,O_nodes_list |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
308 return O_choice,O_nodes_list,special_genes,final_O |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
309 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
310 def seqsero_from_formula_to_serotypes(Otype,fliC,fljB,special_gene_list): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
311 #like test_output_06012017.txt |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
312 #can add more varialbles like sdf-type, sub-species-type in future (we can conclude it into a special-gene-list) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
313 from Initial_Conditions import phase1 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
314 from Initial_Conditions import phase2 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
315 from Initial_Conditions import phaseO |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
316 from Initial_Conditions import sero |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
317 seronames=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
318 for i in range(len(phase1)): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
319 fliC_combine=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
320 fljB_combine=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
321 if phaseO[i]==Otype: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
322 ### for fliC, detect every possible combinations to avoid the effect of "[" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
323 if phase1[i].count("[")==0: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
324 fliC_combine.append(phase1[i]) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
325 elif phase1[i].count("[")>=1: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
326 c=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
327 b=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
328 if phase1[i][0]=="[" and phase1[i][-1]=="]" and phase1[i].count("[")==1: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
329 content=phase1[i].replace("[","").replace("]","") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
330 fliC_combine.append(content) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
331 fliC_combine.append("-") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
332 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
333 for x in phase1[i].split(","): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
334 if "[" in x: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
335 b.append(x.replace("[","").replace("]","")) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
336 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
337 c.append(x) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
338 fliC_combine=Combine(b,c) #Combine will offer every possible combinations of the formula, like f,[g],t: f,t f,g,t |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
339 ### end of fliC "[" detect |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
340 ### for fljB, detect every possible combinations to avoid the effect of "[" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
341 if phase2[i].count("[")==0: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
342 fljB_combine.append(phase2[i]) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
343 elif phase2[i].count("[")>=1: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
344 d=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
345 e=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
346 if phase2[i][0]=="[" and phase2[i][-1]=="]" and phase2[i].count("[")==1: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
347 content=phase2[i].replace("[","").replace("]","") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
348 fljB_combine.append(content) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
349 fljB_combine.append("-") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
350 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
351 for x in phase2[i].split(","): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
352 if "[" in x: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
353 d.append(x.replace("[","").replace("]","")) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
354 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
355 e.append(x) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
356 fljB_combine=Combine(d,e) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
357 ### end of fljB "[" detect |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
358 new_fliC=fliC.split(",") #because some antigen like r,[i] not follow alphabetical order, so use this one to judge and can avoid missings |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
359 new_fliC.sort() |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
360 new_fliC=",".join(new_fliC) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
361 new_fljB=fljB.split(",") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
362 new_fljB.sort() |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
363 new_fljB=",".join(new_fljB) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
364 if (new_fliC in fliC_combine or fliC in fliC_combine) and (new_fljB in fljB_combine or fljB in fljB_combine): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
365 seronames.append(sero[i]) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
366 #analyze seronames |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
367 if len(seronames)==0: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
368 seronames=["N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)"] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
369 star="" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
370 star_line="" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
371 if len(seronames)>1:#there are two possible predictions for serotypes |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
372 star="*" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
373 star_line="The predicted serotypes share the same general formula:\t"+Otype+":"+fliC+":"+fljB+"\n"## |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
374 print "\n" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
375 predict_form=Otype+":"+fliC+":"+fljB# |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
376 predict_sero=(" or ").join(seronames) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
377 ###special test for Enteritidis |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
378 if predict_form=="9:g,m:-": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
379 sdf="-" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
380 for x in special_gene_list: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
381 if x[0].startswith("sdf"): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
382 sdf="+" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
383 predict_form=predict_form+"\nSdf prediction:"+sdf |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
384 if sdf=="-": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
385 star="*" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
386 star_line="Additional characterization is necessary to assign a serotype to this strain. Commonly circulating strains of serotype Enteritidis are sdf+, although sdf- strains of serotype Enteritidis are known to exist. Serotype Gallinarum is typically sdf- but should be quite rare. Sdf- strains of serotype Enteritidis and serotype Gallinarum can be differentiated by phenotypic profile or genetic criteria.\n"#+## |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
387 predict_sero="See comments below" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
388 ###end of special test for Enteritidis |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
389 elif predict_form=="4:i:-": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
390 predict_sero="potential monophasic variant of Typhimurium" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
391 elif predict_form=="4:r:-": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
392 predict_sero="potential monophasic variant of Heidelberg" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
393 elif predict_form=="4:b:-": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
394 predict_sero="potential monophasic variant of Paratyphi B" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
395 elif predict_form=="8:e,h:1,2": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
396 predict_sero="Newport" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
397 star="*" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
398 star_line="Serotype Bardo shares the same antigenic profile with Newport, but Bardo is exceedingly rare." |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
399 claim="The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes."## |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
400 if "N/A" in predict_sero: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
401 claim="" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
402 if "Typhimurium" in predict_sero or predict_form=="4:i:-": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
403 normal=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
404 mutation=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
405 for x in special_gene_list: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
406 if "oafA-O-4_full" in x[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
407 normal=x[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
408 elif "oafA-O-4_5-" in x[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
409 mutation=x[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
410 if normal>mutation: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
411 #print "$$$Typhimurium" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
412 pass |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
413 elif normal<mutation: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
414 predict_sero=predict_sero.strip()+"(O5-)" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
415 star="*"# |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
416 star_line="Detected the deletion of O5-." |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
417 #print "$$$Typhimurium_O5-" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
418 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
419 #print "$$$Typhimurium, even no 7 bases difference" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
420 pass |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
421 return predict_form,predict_sero,star,star_line,claim |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
422 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
423 def main(): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
424 database=sys.argv[1]#used to extract reads |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
425 mapping_mode=sys.argv[2]#mem or sampe |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
426 threads=sys.argv[3] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
427 for_fq=sys.argv[4] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
428 rev_fq=sys.argv[5] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
429 current_time=time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime()) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
430 sam=for_fq+".sam" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
431 bam=for_fq+".bam" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
432 sorted_bam=for_fq+"_sorted.bam" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
433 mapped_fq1=for_fq+"_mapped.fq" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
434 mapped_fq2=rev_fq+"_mapped.fq" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
435 combined_fq=for_fq+"_combined.fq" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
436 for_sai=for_fq+".sai" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
437 rev_sai=rev_fq+".sai" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
438 print "building database..." |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
439 #os.system("bwa index "+database+ " 2> /dev/null") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
440 os.system("bwa index "+database+ " 2>> data_log.txt ") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
441 print "mapping..." |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
442 if mapping_mode=="mem": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
443 os.system("bwa mem -t "+threads+" "+database+" "+for_fq+" "+rev_fq+" > "+sam+ " 2>> data_log.txt") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
444 elif mapping_mode=="sam": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
445 os.system("bwa aln -t "+threads+" "+database+" "+for_fq+" > "+for_sai+ " 2>> data_log.txt") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
446 os.system("bwa aln -t "+threads+" "+database+" "+rev_fq+" > "+rev_sai+ " 2>> data_log.txt") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
447 os.system("bwa sampe "+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam+ " 2>> data_log.txt") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
448 os.system("samtools view -@ "+threads+" -F 4 -Sbh "+sam+" > "+bam) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
449 os.system("samtools view -@ "+threads+" -h -o "+sam+" "+bam) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
450 ### check the version of samtools then use differnt commands |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
451 samtools_version=subprocess.Popen(["samtools"],stdout=subprocess.PIPE,stderr=subprocess.PIPE) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
452 out, err = samtools_version.communicate() |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
453 version = err.split("ersion:")[1].strip().split(" ")[0].strip() |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
454 print "check samtools version:",version |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
455 if LooseVersion(version)<=LooseVersion("1.2"): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
456 os.system("samtools sort -@ "+threads+" -n "+bam+" "+for_fq+"_sorted") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
457 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
458 os.system("samtools sort -@ "+threads+" -n "+bam+" >"+sorted_bam) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
459 ### end of samtools version check and its analysis |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
460 os.system("bamToFastq -i "+sorted_bam+" -fq "+combined_fq) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
461 os.system("bamToFastq -i "+sorted_bam+" -fq "+mapped_fq1+" -fq2 "+mapped_fq2 + " 2>> data_log.txt")#2> /dev/null if want no output |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
462 outdir=current_time+"_temp" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
463 print "assembling..." |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
464 if int(threads)>4: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
465 t="4" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
466 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
467 t=threads |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
468 os.system("spades.py --careful --pe1-s "+combined_fq+" --pe1-1 "+mapped_fq1+" --pe1-2 "+mapped_fq2+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
469 new_fasta=for_fq+"_"+database+"_"+mapping_mode+".fasta" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
470 os.system("mv "+outdir+"/contigs.fasta "+new_fasta+ " 2> /dev/null") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
471 #os.system("mv "+outdir+"/scaffolds.fasta "+new_fasta+ " 2> /dev/null") contigs.fasta |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
472 os.system("rm -rf "+outdir+ " 2> /dev/null") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
473 ### begin blast |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
474 print "blasting..." |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
475 print "\n" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
476 xmlfile=for_fq+"-extracted_vs_"+database+"_"+mapping_mode+".xml" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
477 os.system('makeblastdb -in '+new_fasta+' -out '+new_fasta+'_db '+'-dbtype nucl >> data_log.txt 2>&1') #temp.txt is to forbid the blast result interrupt the output of our program###1/27/2015 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
478 os.system("blastn -word_size 10 -query "+database+" -db "+new_fasta+"_db -out "+xmlfile+" -outfmt 5 >> data_log.txt 2>&1")###1/27/2015 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
479 Final_list=xml_parse_score_comparision_seqsero(xmlfile) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
480 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1])>=3.5 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]))] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
481 fliC_choice="-" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
482 fljB_choice="-" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
483 fliC_contig="NA" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
484 fljB_contig="NA" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
485 fliC_length=0 #can be changed to coverage in future |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
486 fljB_length=0 #can be changed to coverage in future |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
487 O_choice=""#no need to decide O contig for now, should be only one |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
488 O_choice,O_nodes,special_gene_list,O_nodes_roles=decide_O_type_and_get_special_genes(Final_list)#decide the O antigen type and also return special-gene-list for further identification |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
489 O_choice=O_choice.split("-")[-1].strip() |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
490 H_contig_roles=decide_contig_roles_for_H_antigen(Final_list)#decide the H antigen contig is fliC or fljB |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
491 log_file=open("SeqSero_hybrid_assembly_log.txt","a") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
492 print "O_contigs:" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
493 log_file.write("O_contigs:\n") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
494 for x in O_nodes_roles: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
495 if "O-1,3,19_not_in_3,10" not in x[0]:#O-1,3,19_not_in_3,10 is just a small size marker |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
496 print x[0].split("___")[-1],x[0].split("__")[0],"blast score:",x[1],"identity%:",str(round(x[2]*100,2))+"%" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
497 log_file.write(x[0].split("___")[-1]+" "+x[0].split("__")[0]+" "+"blast score: "+str(x[1])+"identity%:"+str(round(x[2]*100,2))+"%"+"\n") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
498 print "H_contigs:" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
499 log_file.write("H_contigs:\n") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
500 H_contig_stat=[] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
501 for i in range(len(H_contig_roles)): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
502 x=H_contig_roles[i] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
503 a=0 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
504 for y in Final_list_passed: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
505 if x[1] in y[0] and y[0].startswith(x[0]): |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
506 if "first" in y[0] or "last" in y[0]: #this is the final filter to decide it's fliC or fljB, if can't pass, then can't decide |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
507 for y in Final_list_passed: #it's impossible to has the "first" and "last" allele as prediction, so re-do it |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
508 if x[1] in y[0]:#it's very possible to be third phase allele, so no need to make it must be fliC or fljB |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
509 print x[1],"can't_decide_fliC_or_fljB",y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
510 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+" "+"blast_score: "+str(y[1])+" identity%:"+str(round(y[2]*100,2))+"%"+"\n") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
511 H_contig_roles[i]="can't decide fliC or fljB, may be third phase" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
512 break |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
513 else: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
514 print x[1],x[0],y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
515 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+" "+"blast_score: "+str(y[1])+" identity%:"+str(round(y[2]*100,2))+"%"+"\n") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
516 break |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
517 for x in H_contig_roles: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
518 #if multiple choices, temporately select the one with longest length for now, will revise in further change |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
519 if "fliC" == x[0] and int(x[1].split("_")[3])>=fliC_length and x[1] not in O_nodes:#remember to avoid the effect of O-type contig, so should not in O_node list |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
520 fliC_contig=x[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
521 fliC_length=int(x[1].split("_")[3]) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
522 elif "fljB" == x[0] and int(x[1].split("_")[3])>=fljB_length and x[1] not in O_nodes: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
523 fljB_contig=x[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
524 fljB_length=int(x[1].split("_")[3]) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
525 for x in Final_list_passed: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
526 if fliC_choice=="-" and "fliC_" in x[0] and fliC_contig in x[0] : |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
527 fliC_choice=x[0].split("_")[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
528 elif fljB_choice=="-" and "fljB_" in x[0] and fljB_contig in x[0]: |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
529 fljB_choice=x[0].split("_")[1] |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
530 elif fliC_choice!="-" and fljB_choice!="-": |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
531 break |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
532 print "\n" |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
533 print "SeqSero Input files:",for_fq,rev_fq |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
534 print "Most possible O antigen:",O_choice |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
535 print "Most possible H1 antigen:",fliC_choice |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
536 print "Most possible H2 antigen:",fljB_choice |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
537 #print Final_list |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
538 ###output |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
539 predict_form,predict_sero,star,star_line,claim=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list) |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
540 new_file=open("Seqsero_result.txt","w") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
541 new_file.write("Input files:\t"+for_fq+" "+rev_fq+"\n"+"O antigen prediction:\t"+"O-"+O_choice+"\n"+"H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+"H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+"Predicted antigenic profile:\t"+predict_form+"\n"+"Predicted serotype(s):\t"+predict_sero+star+"\n"+star+star_line+claim+"\n")#+## |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
542 new_file.close() |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
543 os.system("cat Seqsero_result.txt") |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
544 |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
545 if __name__ == '__main__': |
6275272ebcbc
planemo upload commit 9b152b4a900a8cd70df992da881c7e3fa00d4e4c-dirty
cstrittmatter
parents:
diff
changeset
|
546 main() |