annotate SeqSero/libs/H_combination_output_analysis.py @ 4:ab0802d77891 draft default tip

Uploaded
author estrain
date Thu, 12 Sep 2019 06:46:00 -0400
parents c577b57b7c74
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
1 #!/usr/bin/env python
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
2 # "H_combination_output_analysis.py target.fasta fliCdatabase.fasta fljBdatabase.fasta"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
3 # must have ispcr and primers of fliC and fljB at the same directory
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
4
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
5
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
6 import os
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
7 from Bio import SeqIO
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
8 import sys
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
9 from Bio.Blast import NCBIXML
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
10 from Initial_Conditions import phase1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
11 from Initial_Conditions import phase2
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
12
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
13
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
14
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
15 target=sys.argv[1]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
16 database_fliC=sys.argv[2]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
17 database_fljB=sys.argv[3]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
18 output=target.split('.')[0]+'_out.fasta'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
19
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
20 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
21 database_path="database"###01/27/2015,database_path=dirpath+"/database"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
22 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fliC.txt '+target+'_fliC.fa')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
23 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fljB.txt '+target+'_fljB.fa')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
24 fliC=target+'_fliC.fa'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
25 fljB=target+'_fljB.fa'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
26
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
27 if os.path.getsize(fliC)>10:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
28 os.system('makeblastdb -in '+database_fliC+' -out '+database_fliC+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
29 os.system('blastx -seg=no -query '+fliC+' -db '+database_fliC+'_db '+'-out '+'FliC_Htype_'+target+'.xml '+'-outfmt 5')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
30 print target
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
31 fliC_XML='FliC_Htype_'+target+'.xml'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
32 fliC_handle=open(fliC_XML)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
33 records=NCBIXML.parse(fliC_handle)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
34 fliC_records=list(records)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
35 E_thresh=1e-10
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
36 hspbit=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
37 alignmentlist=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
38 for record in fliC_records:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
39 for alignment in record.alignments:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
40 hsp_bit_score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
41 startlist=[]#the percentage algorithm don't consider one situation, the new hsp cover old hsp
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
42 endlist=[]#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
43 for hsp in alignment.hsps:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
44 start=hsp.query_start#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
45 end=hsp.query_end#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
46 leng=abs(start-end)#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
47 if hsp.expect<E_thresh:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
48 if start>end:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
49 temp=start#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
50 start=end#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
51 end=start#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
52 if len(startlist)==0:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
53 hsp_bit_score=hsp_bit_score+hsp.bits#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
54 startlist.append(start)#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
55 endlist.append(end)#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
56 else:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
57 for i in range(len(startlist)):#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
58 if startlist[i]<start<endlist[i]:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
59 start=endlist[i]+1#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
60 if startlist[i]<end<endlist[i]:#03112016
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
61 end=startlist[i]-1#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
62 if end<start:#the new hsp was included in old hsp#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
63 percentage=0#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
64 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
65 percentage=float(end-start)/leng#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
66 startlist.append(start)#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
67 endlist.append(end)#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
68 hsp_bit_score=hsp_bit_score+percentage*hsp.bits#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
69 alignment=alignment.hit_def+':'+str(hsp_bit_score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
70 hspbit.append(hsp_bit_score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
71 alignmentlist.append(alignment)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
72 scorelist=dict(zip(alignmentlist,hspbit))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
73 score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
74 serotype=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
75 seroscore=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
76 for Htype in scorelist:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
77 if scorelist[Htype]>score:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
78 First_Choice=Htype
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
79 score=scorelist[Htype]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
80 if locals().has_key('First_Choice'):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
81 serotype.append(First_Choice.split("__")[0])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
82 seroscore.append(score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
83 secscore=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
84 for Htype in scorelist:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
85
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
86 if scorelist[Htype]>secscore and (Htype.split("__")[0] not in serotype):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
87 Sec_Choice=Htype
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
88 secscore=scorelist[Htype]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
89 if locals().has_key('Sec_Choice'):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
90 serotype.append(Sec_Choice.split("__")[0])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
91 seroscore.append(secscore)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
92 thirdscore=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
93 for Htype in scorelist:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
94 if scorelist[Htype]>thirdscore and (Htype.split("__")[0] not in serotype):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
95 Third_Choice=Htype
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
96 thirdscore=scorelist[Htype]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
97 if locals().has_key('Third_Choice'):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
98 serotype.append(Third_Choice.split("__")[0])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
99 seroscore.append(thirdscore)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
100 print serotype,seroscore
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
101 if score>100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
102 print '#',target,'$$$ Most possible H_fliC_type: ',First_Choice,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
103 print '$$$ bit_score:',score,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
104 if locals().has_key('secscore'):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
105 if secscore>100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
106 print '#',target,'$$$ Second possible H_fliC_type: ',Sec_Choice,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
107 print '$$$ Second bit_score:',secscore,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
108 if locals().has_key('thirdscore'):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
109 if thirdscore>100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
110 print '#',target,'$$$ Third possible H_fliC_type: ',Third_Choice,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
111 print '$$$ Third bit_score:',thirdscore,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
112 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
113 print '$$$ No fliC in',target
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
114 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
115 score=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
116 print '$$$ No fliC (no file created) in',target
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
117
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
118
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
119
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
120 if os.path.getsize(fljB)>10:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
121 os.system('makeblastdb -in '+database_fljB+' -out '+database_fljB+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
122 os.system('blastx -query '+fljB+' -db '+database_fljB+'_db '+'-out '+'FljB_Htype_'+target+'.xml '+'-outfmt 5')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
123 print target
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
124 fljB_XML='FljB_Htype_'+target+'.xml'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
125 fljB_handle=open(fljB_XML)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
126 records=NCBIXML.parse(fljB_handle)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
127 fljB_records=list(records)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
128 E_thresh=1e-10
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
129 hspbit=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
130 alignmentlist=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
131 for record in fljB_records:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
132 for alignment in record.alignments:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
133 hsp_bit_score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
134 startlist=[]#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
135 endlist=[]#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
136 for hsp in alignment.hsps:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
137 start=hsp.query_start#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
138 end=hsp.query_end#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
139 leng=abs(start-end)#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
140 if hsp.expect<E_thresh:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
141 if start>end:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
142 temp=start#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
143 start=end#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
144 end=start#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
145 if len(startlist)==0:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
146 hsp_bit_score=hsp_bit_score+hsp.bits#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
147 startlist.append(start)#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
148 endlist.append(end)#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
149 else:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
150 for i in range(len(startlist)):#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
151 if startlist[i]<start<endlist[i]:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
152 start=endlist[i]+1#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
153 if startlist[i]<end<endlist[i]:#03112016
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
154 end=startlist[i]-1#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
155 if end<start:#the new hsp was included in old hsp#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
156 percentage=0#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
157 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
158 percentage=float(end-start)/leng#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
159 startlist.append(start)#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
160 endlist.append(end)#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
161 hsp_bit_score=hsp_bit_score+percentage*hsp.bits#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
162 alignment=alignment.hit_def+':'+str(hsp_bit_score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
163 hspbit.append(hsp_bit_score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
164 alignmentlist.append(alignment)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
165 fljB_scorelist=dict(zip(alignmentlist,hspbit))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
166
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
167 fljB_score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
168 fljB_serotype=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
169 fljB_seroscore=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
170 for Htype in fljB_scorelist:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
171 if fljB_scorelist[Htype]>fljB_score:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
172 fljB_First_Choice=Htype
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
173 fljB_score=fljB_scorelist[Htype]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
174 if locals().has_key('fljB_First_Choice'):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
175 fljB_serotype.append(fljB_First_Choice.split("__")[0])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
176 fljB_seroscore.append(fljB_score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
177 fljB_secscore=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
178 for Htype in fljB_scorelist:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
179 if fljB_scorelist[Htype]>fljB_secscore and (Htype.split("__")[0] not in fljB_serotype):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
180 fljB_Sec_Choice=Htype
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
181 fljB_secscore=fljB_scorelist[Htype]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
182 if locals().has_key('fljB_Sec_Choice'):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
183 fljB_serotype.append(fljB_Sec_Choice.split("__")[0])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
184 fljB_seroscore.append(fljB_secscore)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
185 fljB_thirdscore=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
186 for Htype in fljB_scorelist:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
187 if fljB_scorelist[Htype]>fljB_thirdscore and (Htype.split("__")[0] not in fljB_serotype):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
188 fljB_Third_Choice=Htype
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
189 fljB_thirdscore=fljB_scorelist[Htype]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
190 if locals().has_key('fljB_Third_Choice'):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
191 fljB_serotype.append(fljB_Third_Choice.split("__")[0])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
192 fljB_seroscore.append(fljB_thirdscore)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
193
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
194 if fljB_score>100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
195 print '#',target,'$$$ Most possible H_fljB_type: ',fljB_First_Choice,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
196 print '$$$ Most bit_score:',fljB_score,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
197 if locals().has_key('fljB_secscore'):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
198 if fljB_secscore>100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
199 print '#',target,'$$$ Second possible H_fljB_type: ',fljB_Sec_Choice,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
200 print '$$$ Second bit_score:',fljB_secscore,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
201 if locals().has_key('fljB_thirdscore'):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
202 if fljB_thirdscore>100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
203 print '#',target,'$$$ Third possible H_fljB_type: ',fljB_Third_Choice,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
204 print '$$$ Third bit_score:',fljB_thirdscore,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
205 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
206 print '$$$ No fljB in',target
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
207 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
208 fljB_score=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
209 print '$$$ No fljB (no file created) in',target
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
210
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
211
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
212 if score>100 and fljB_score>100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
213 fliC_sero=dict(zip(serotype,seroscore))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
214 fljB_sero=dict(zip(fljB_serotype,fljB_seroscore))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
215 combination=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
216 combination_score=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
217 for seroname in fliC_sero:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
218 for fljB_seroname in fljB_sero:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
219 for i in range(len(phase1)):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
220 if phase1[i]==seroname and phase2[i]==fljB_seroname:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
221 name=seroname+"_"+fljB_seroname
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
222 score=fliC_sero[seroname]+fljB_sero[fljB_seroname]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
223 combination.append(name)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
224 combination_score.append(score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
225 combinationlist=dict(zip(combination,combination_score)) #we can do the filteration here
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
226 final_dict=sorted(combinationlist.iteritems(), key=lambda d:d[1], reverse = True)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
227 print "$$_H:Order:",final_dict
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
228 elif score>100 and fljB_score<100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
229 print "$$_H:No fljB, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
230 elif score<100 and fljB_score>100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
231 print "$$_H:No fliC, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
232 elif score==1 and fljB_score>100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
233 print "$$_H:No fliC (file) existed, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
234 elif score==1 and fljB_score<100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
235 print "$$_H:No fliC (file) existed, and no fljB"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
236 elif score>100 and fljB_score==1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
237 print "$$_H:No fljB (file) existed, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
238 elif score<100 and fljB_score==1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
239 print "$$_H:No fljB (file) existed, and no fliC"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
240 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
241 print "$$_H:No fliC and fljB"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
242
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
243
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
244 '''
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
245 E_thresh=1e-10
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
246 hspbit=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
247 alignmentlist=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
248 for record in fliC_records:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
249 for alignment in record.alignments:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
250 hsp_bit_score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
251 for hsp in alignment.hsps:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
252 if hsp.expect<E_thresh:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
253 hsp_bit_score=hsp_bit_score+hsp.bits
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
254 alignment=alignment.hit_def+':'+str(hsp_bit_score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
255 hspbit.append(hsp_bit_score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
256 alignmentlist.append(alignment)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
257
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
258 scorelist=dict(zip(alignmentlist,hspbit))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
259 score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
260 for Htype in scorelist:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
261 if scorelist[Htype]>score:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
262 First_Choice=Htype
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
263 score=scorelist[Htype]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
264 if score>100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
265 print '#',target,'Most possible H_fliC_type: ',First_Choice,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
266 print '#bit_score:',score,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
267 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
268 print '#No fliC in',target
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
269
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
270
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
271 E_thresh=1e-10
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
272 hspbit=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
273 alignmentlist=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
274 for record in fljB_records:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
275 for alignment in record.alignments:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
276 hsp_bit_score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
277 for hsp in alignment.hsps:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
278 if hsp.expect<E_thresh:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
279 hsp_bit_score=hsp_bit_score+hsp.bits
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
280 alignment=alignment.hit_def+':'+str(hsp_bit_score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
281 hspbit.append(hsp_bit_score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
282 alignmentlist.append(alignment)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
283
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
284 scorelist=dict(zip(alignmentlist,hspbit))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
285 fljB_score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
286 for Htype in scorelist:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
287 if scorelist[Htype]>fljB_score:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
288 First_Choice=Htype
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
289 fljB_score=scorelist[Htype]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
290 if fljB_score>100:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
291 print '#',target,'Most possible H_fljB_type: ',First_Choice,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
292 print '#bit_score:',fljB_score,'\n'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
293 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
294 print '#No fljB in',target
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
295 '''
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
296