0
|
1 #!/usr/bin/env python
|
|
2 # "H_combination_output_analysis.py target.fasta fliCdatabase.fasta fljBdatabase.fasta"
|
|
3 # must have ispcr and primers of fliC and fljB at the same directory
|
|
4
|
|
5
|
|
6 import os
|
|
7 from Bio import SeqIO
|
|
8 import sys
|
|
9 from Bio.Blast import NCBIXML
|
|
10 from Initial_Conditions import phase1
|
|
11 from Initial_Conditions import phase2
|
|
12
|
|
13
|
|
14
|
|
15 target=sys.argv[1]
|
|
16 database_fliC=sys.argv[2]
|
|
17 database_fljB=sys.argv[3]
|
|
18 output=target.split('.')[0]+'_out.fasta'
|
|
19
|
|
20 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015
|
|
21 database_path="database"###01/27/2015,database_path=dirpath+"/database"
|
|
22 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fliC.txt '+target+'_fliC.fa')
|
|
23 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fljB.txt '+target+'_fljB.fa')
|
|
24 fliC=target+'_fliC.fa'
|
|
25 fljB=target+'_fljB.fa'
|
|
26
|
|
27 if os.path.getsize(fliC)>10:
|
|
28 os.system('makeblastdb -in '+database_fliC+' -out '+database_fliC+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already
|
|
29 os.system('blastx -seg=no -query '+fliC+' -db '+database_fliC+'_db '+'-out '+'FliC_Htype_'+target+'.xml '+'-outfmt 5')
|
|
30 print target
|
|
31 fliC_XML='FliC_Htype_'+target+'.xml'
|
|
32 fliC_handle=open(fliC_XML)
|
|
33 records=NCBIXML.parse(fliC_handle)
|
|
34 fliC_records=list(records)
|
|
35 E_thresh=1e-10
|
|
36 hspbit=[]
|
|
37 alignmentlist=[]
|
|
38 for record in fliC_records:
|
|
39 for alignment in record.alignments:
|
|
40 hsp_bit_score=0
|
|
41 startlist=[]#the percentage algorithm don't consider one situation, the new hsp cover old hsp
|
|
42 endlist=[]#
|
|
43 for hsp in alignment.hsps:
|
|
44 start=hsp.query_start#
|
|
45 end=hsp.query_end#
|
|
46 leng=abs(start-end)#
|
|
47 if hsp.expect<E_thresh:#
|
|
48 if start>end:#
|
|
49 temp=start#
|
|
50 start=end#
|
|
51 end=start#
|
|
52 if len(startlist)==0:#
|
|
53 hsp_bit_score=hsp_bit_score+hsp.bits#
|
|
54 startlist.append(start)#
|
|
55 endlist.append(end)#
|
|
56 else:#
|
|
57 for i in range(len(startlist)):#
|
|
58 if startlist[i]<start<endlist[i]:#
|
|
59 start=endlist[i]+1#
|
|
60 if startlist[i]<end<endlist[i]:#03112016
|
|
61 end=startlist[i]-1#
|
|
62 if end<start:#the new hsp was included in old hsp#
|
|
63 percentage=0#
|
|
64 else:
|
|
65 percentage=float(end-start)/leng#
|
|
66 startlist.append(start)#
|
|
67 endlist.append(end)#
|
|
68 hsp_bit_score=hsp_bit_score+percentage*hsp.bits#
|
|
69 alignment=alignment.hit_def+':'+str(hsp_bit_score)
|
|
70 hspbit.append(hsp_bit_score)
|
|
71 alignmentlist.append(alignment)
|
|
72 scorelist=dict(zip(alignmentlist,hspbit))
|
|
73 score=0
|
|
74 serotype=[]
|
|
75 seroscore=[]
|
|
76 for Htype in scorelist:
|
|
77 if scorelist[Htype]>score:
|
|
78 First_Choice=Htype
|
|
79 score=scorelist[Htype]
|
|
80 if locals().has_key('First_Choice'):
|
|
81 serotype.append(First_Choice.split("__")[0])
|
|
82 seroscore.append(score)
|
|
83 secscore=0
|
|
84 for Htype in scorelist:
|
|
85
|
|
86 if scorelist[Htype]>secscore and (Htype.split("__")[0] not in serotype):
|
|
87 Sec_Choice=Htype
|
|
88 secscore=scorelist[Htype]
|
|
89 if locals().has_key('Sec_Choice'):
|
|
90 serotype.append(Sec_Choice.split("__")[0])
|
|
91 seroscore.append(secscore)
|
|
92 thirdscore=0
|
|
93 for Htype in scorelist:
|
|
94 if scorelist[Htype]>thirdscore and (Htype.split("__")[0] not in serotype):
|
|
95 Third_Choice=Htype
|
|
96 thirdscore=scorelist[Htype]
|
|
97 if locals().has_key('Third_Choice'):
|
|
98 serotype.append(Third_Choice.split("__")[0])
|
|
99 seroscore.append(thirdscore)
|
|
100 print serotype,seroscore
|
|
101 if score>100:
|
|
102 print '#',target,'$$$ Most possible H_fliC_type: ',First_Choice,'\n'
|
|
103 print '$$$ bit_score:',score,'\n'
|
|
104 if locals().has_key('secscore'):
|
|
105 if secscore>100:
|
|
106 print '#',target,'$$$ Second possible H_fliC_type: ',Sec_Choice,'\n'
|
|
107 print '$$$ Second bit_score:',secscore,'\n'
|
|
108 if locals().has_key('thirdscore'):
|
|
109 if thirdscore>100:
|
|
110 print '#',target,'$$$ Third possible H_fliC_type: ',Third_Choice,'\n'
|
|
111 print '$$$ Third bit_score:',thirdscore,'\n'
|
|
112 else:
|
|
113 print '$$$ No fliC in',target
|
|
114 else:
|
|
115 score=1
|
|
116 print '$$$ No fliC (no file created) in',target
|
|
117
|
|
118
|
|
119
|
|
120 if os.path.getsize(fljB)>10:
|
|
121 os.system('makeblastdb -in '+database_fljB+' -out '+database_fljB+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already
|
|
122 os.system('blastx -query '+fljB+' -db '+database_fljB+'_db '+'-out '+'FljB_Htype_'+target+'.xml '+'-outfmt 5')
|
|
123 print target
|
|
124 fljB_XML='FljB_Htype_'+target+'.xml'
|
|
125 fljB_handle=open(fljB_XML)
|
|
126 records=NCBIXML.parse(fljB_handle)
|
|
127 fljB_records=list(records)
|
|
128 E_thresh=1e-10
|
|
129 hspbit=[]
|
|
130 alignmentlist=[]
|
|
131 for record in fljB_records:
|
|
132 for alignment in record.alignments:
|
|
133 hsp_bit_score=0
|
|
134 startlist=[]#
|
|
135 endlist=[]#
|
|
136 for hsp in alignment.hsps:
|
|
137 start=hsp.query_start#
|
|
138 end=hsp.query_end#
|
|
139 leng=abs(start-end)#
|
|
140 if hsp.expect<E_thresh:#
|
|
141 if start>end:#
|
|
142 temp=start#
|
|
143 start=end#
|
|
144 end=start#
|
|
145 if len(startlist)==0:#
|
|
146 hsp_bit_score=hsp_bit_score+hsp.bits#
|
|
147 startlist.append(start)#
|
|
148 endlist.append(end)#
|
|
149 else:#
|
|
150 for i in range(len(startlist)):#
|
|
151 if startlist[i]<start<endlist[i]:#
|
|
152 start=endlist[i]+1#
|
|
153 if startlist[i]<end<endlist[i]:#03112016
|
|
154 end=startlist[i]-1#
|
|
155 if end<start:#the new hsp was included in old hsp#
|
|
156 percentage=0#
|
|
157 else:
|
|
158 percentage=float(end-start)/leng#
|
|
159 startlist.append(start)#
|
|
160 endlist.append(end)#
|
|
161 hsp_bit_score=hsp_bit_score+percentage*hsp.bits#
|
|
162 alignment=alignment.hit_def+':'+str(hsp_bit_score)
|
|
163 hspbit.append(hsp_bit_score)
|
|
164 alignmentlist.append(alignment)
|
|
165 fljB_scorelist=dict(zip(alignmentlist,hspbit))
|
|
166
|
|
167 fljB_score=0
|
|
168 fljB_serotype=[]
|
|
169 fljB_seroscore=[]
|
|
170 for Htype in fljB_scorelist:
|
|
171 if fljB_scorelist[Htype]>fljB_score:
|
|
172 fljB_First_Choice=Htype
|
|
173 fljB_score=fljB_scorelist[Htype]
|
|
174 if locals().has_key('fljB_First_Choice'):
|
|
175 fljB_serotype.append(fljB_First_Choice.split("__")[0])
|
|
176 fljB_seroscore.append(fljB_score)
|
|
177 fljB_secscore=0
|
|
178 for Htype in fljB_scorelist:
|
|
179 if fljB_scorelist[Htype]>fljB_secscore and (Htype.split("__")[0] not in fljB_serotype):
|
|
180 fljB_Sec_Choice=Htype
|
|
181 fljB_secscore=fljB_scorelist[Htype]
|
|
182 if locals().has_key('fljB_Sec_Choice'):
|
|
183 fljB_serotype.append(fljB_Sec_Choice.split("__")[0])
|
|
184 fljB_seroscore.append(fljB_secscore)
|
|
185 fljB_thirdscore=0
|
|
186 for Htype in fljB_scorelist:
|
|
187 if fljB_scorelist[Htype]>fljB_thirdscore and (Htype.split("__")[0] not in fljB_serotype):
|
|
188 fljB_Third_Choice=Htype
|
|
189 fljB_thirdscore=fljB_scorelist[Htype]
|
|
190 if locals().has_key('fljB_Third_Choice'):
|
|
191 fljB_serotype.append(fljB_Third_Choice.split("__")[0])
|
|
192 fljB_seroscore.append(fljB_thirdscore)
|
|
193
|
|
194 if fljB_score>100:
|
|
195 print '#',target,'$$$ Most possible H_fljB_type: ',fljB_First_Choice,'\n'
|
|
196 print '$$$ Most bit_score:',fljB_score,'\n'
|
|
197 if locals().has_key('fljB_secscore'):
|
|
198 if fljB_secscore>100:
|
|
199 print '#',target,'$$$ Second possible H_fljB_type: ',fljB_Sec_Choice,'\n'
|
|
200 print '$$$ Second bit_score:',fljB_secscore,'\n'
|
|
201 if locals().has_key('fljB_thirdscore'):
|
|
202 if fljB_thirdscore>100:
|
|
203 print '#',target,'$$$ Third possible H_fljB_type: ',fljB_Third_Choice,'\n'
|
|
204 print '$$$ Third bit_score:',fljB_thirdscore,'\n'
|
|
205 else:
|
|
206 print '$$$ No fljB in',target
|
|
207 else:
|
|
208 fljB_score=1
|
|
209 print '$$$ No fljB (no file created) in',target
|
|
210
|
|
211
|
|
212 if score>100 and fljB_score>100:
|
|
213 fliC_sero=dict(zip(serotype,seroscore))
|
|
214 fljB_sero=dict(zip(fljB_serotype,fljB_seroscore))
|
|
215 combination=[]
|
|
216 combination_score=[]
|
|
217 for seroname in fliC_sero:
|
|
218 for fljB_seroname in fljB_sero:
|
|
219 for i in range(len(phase1)):
|
|
220 if phase1[i]==seroname and phase2[i]==fljB_seroname:
|
|
221 name=seroname+"_"+fljB_seroname
|
|
222 score=fliC_sero[seroname]+fljB_sero[fljB_seroname]
|
|
223 combination.append(name)
|
|
224 combination_score.append(score)
|
|
225 combinationlist=dict(zip(combination,combination_score)) #we can do the filteration here
|
|
226 final_dict=sorted(combinationlist.iteritems(), key=lambda d:d[1], reverse = True)
|
|
227 print "$$_H:Order:",final_dict
|
|
228 elif score>100 and fljB_score<100:
|
|
229 print "$$_H:No fljB, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice
|
|
230 elif score<100 and fljB_score>100:
|
|
231 print "$$_H:No fliC, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice
|
|
232 elif score==1 and fljB_score>100:
|
|
233 print "$$_H:No fliC (file) existed, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice
|
|
234 elif score==1 and fljB_score<100:
|
|
235 print "$$_H:No fliC (file) existed, and no fljB"
|
|
236 elif score>100 and fljB_score==1:
|
|
237 print "$$_H:No fljB (file) existed, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice
|
|
238 elif score<100 and fljB_score==1:
|
|
239 print "$$_H:No fljB (file) existed, and no fliC"
|
|
240 else:
|
|
241 print "$$_H:No fliC and fljB"
|
|
242
|
|
243
|
|
244 '''
|
|
245 E_thresh=1e-10
|
|
246 hspbit=[]
|
|
247 alignmentlist=[]
|
|
248 for record in fliC_records:
|
|
249 for alignment in record.alignments:
|
|
250 hsp_bit_score=0
|
|
251 for hsp in alignment.hsps:
|
|
252 if hsp.expect<E_thresh:
|
|
253 hsp_bit_score=hsp_bit_score+hsp.bits
|
|
254 alignment=alignment.hit_def+':'+str(hsp_bit_score)
|
|
255 hspbit.append(hsp_bit_score)
|
|
256 alignmentlist.append(alignment)
|
|
257
|
|
258 scorelist=dict(zip(alignmentlist,hspbit))
|
|
259 score=0
|
|
260 for Htype in scorelist:
|
|
261 if scorelist[Htype]>score:
|
|
262 First_Choice=Htype
|
|
263 score=scorelist[Htype]
|
|
264 if score>100:
|
|
265 print '#',target,'Most possible H_fliC_type: ',First_Choice,'\n'
|
|
266 print '#bit_score:',score,'\n'
|
|
267 else:
|
|
268 print '#No fliC in',target
|
|
269
|
|
270
|
|
271 E_thresh=1e-10
|
|
272 hspbit=[]
|
|
273 alignmentlist=[]
|
|
274 for record in fljB_records:
|
|
275 for alignment in record.alignments:
|
|
276 hsp_bit_score=0
|
|
277 for hsp in alignment.hsps:
|
|
278 if hsp.expect<E_thresh:
|
|
279 hsp_bit_score=hsp_bit_score+hsp.bits
|
|
280 alignment=alignment.hit_def+':'+str(hsp_bit_score)
|
|
281 hspbit.append(hsp_bit_score)
|
|
282 alignmentlist.append(alignment)
|
|
283
|
|
284 scorelist=dict(zip(alignmentlist,hspbit))
|
|
285 fljB_score=0
|
|
286 for Htype in scorelist:
|
|
287 if scorelist[Htype]>fljB_score:
|
|
288 First_Choice=Htype
|
|
289 fljB_score=scorelist[Htype]
|
|
290 if fljB_score>100:
|
|
291 print '#',target,'Most possible H_fljB_type: ',First_Choice,'\n'
|
|
292 print '#bit_score:',fljB_score,'\n'
|
|
293 else:
|
|
294 print '#No fljB in',target
|
|
295 '''
|
|
296
|