comparison SeqSero/libs/H_combination_output_analysis.py @ 0:c577b57b7c74 draft

Uploaded
author estrain
date Wed, 06 Dec 2017 15:59:29 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c577b57b7c74
1 #!/usr/bin/env python
2 # "H_combination_output_analysis.py target.fasta fliCdatabase.fasta fljBdatabase.fasta"
3 # must have ispcr and primers of fliC and fljB at the same directory
4
5
6 import os
7 from Bio import SeqIO
8 import sys
9 from Bio.Blast import NCBIXML
10 from Initial_Conditions import phase1
11 from Initial_Conditions import phase2
12
13
14
15 target=sys.argv[1]
16 database_fliC=sys.argv[2]
17 database_fljB=sys.argv[3]
18 output=target.split('.')[0]+'_out.fasta'
19
20 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015
21 database_path="database"###01/27/2015,database_path=dirpath+"/database"
22 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fliC.txt '+target+'_fliC.fa')
23 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fljB.txt '+target+'_fljB.fa')
24 fliC=target+'_fliC.fa'
25 fljB=target+'_fljB.fa'
26
27 if os.path.getsize(fliC)>10:
28 os.system('makeblastdb -in '+database_fliC+' -out '+database_fliC+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already
29 os.system('blastx -seg=no -query '+fliC+' -db '+database_fliC+'_db '+'-out '+'FliC_Htype_'+target+'.xml '+'-outfmt 5')
30 print target
31 fliC_XML='FliC_Htype_'+target+'.xml'
32 fliC_handle=open(fliC_XML)
33 records=NCBIXML.parse(fliC_handle)
34 fliC_records=list(records)
35 E_thresh=1e-10
36 hspbit=[]
37 alignmentlist=[]
38 for record in fliC_records:
39 for alignment in record.alignments:
40 hsp_bit_score=0
41 startlist=[]#the percentage algorithm don't consider one situation, the new hsp cover old hsp
42 endlist=[]#
43 for hsp in alignment.hsps:
44 start=hsp.query_start#
45 end=hsp.query_end#
46 leng=abs(start-end)#
47 if hsp.expect<E_thresh:#
48 if start>end:#
49 temp=start#
50 start=end#
51 end=start#
52 if len(startlist)==0:#
53 hsp_bit_score=hsp_bit_score+hsp.bits#
54 startlist.append(start)#
55 endlist.append(end)#
56 else:#
57 for i in range(len(startlist)):#
58 if startlist[i]<start<endlist[i]:#
59 start=endlist[i]+1#
60 if startlist[i]<end<endlist[i]:#03112016
61 end=startlist[i]-1#
62 if end<start:#the new hsp was included in old hsp#
63 percentage=0#
64 else:
65 percentage=float(end-start)/leng#
66 startlist.append(start)#
67 endlist.append(end)#
68 hsp_bit_score=hsp_bit_score+percentage*hsp.bits#
69 alignment=alignment.hit_def+':'+str(hsp_bit_score)
70 hspbit.append(hsp_bit_score)
71 alignmentlist.append(alignment)
72 scorelist=dict(zip(alignmentlist,hspbit))
73 score=0
74 serotype=[]
75 seroscore=[]
76 for Htype in scorelist:
77 if scorelist[Htype]>score:
78 First_Choice=Htype
79 score=scorelist[Htype]
80 if locals().has_key('First_Choice'):
81 serotype.append(First_Choice.split("__")[0])
82 seroscore.append(score)
83 secscore=0
84 for Htype in scorelist:
85
86 if scorelist[Htype]>secscore and (Htype.split("__")[0] not in serotype):
87 Sec_Choice=Htype
88 secscore=scorelist[Htype]
89 if locals().has_key('Sec_Choice'):
90 serotype.append(Sec_Choice.split("__")[0])
91 seroscore.append(secscore)
92 thirdscore=0
93 for Htype in scorelist:
94 if scorelist[Htype]>thirdscore and (Htype.split("__")[0] not in serotype):
95 Third_Choice=Htype
96 thirdscore=scorelist[Htype]
97 if locals().has_key('Third_Choice'):
98 serotype.append(Third_Choice.split("__")[0])
99 seroscore.append(thirdscore)
100 print serotype,seroscore
101 if score>100:
102 print '#',target,'$$$ Most possible H_fliC_type: ',First_Choice,'\n'
103 print '$$$ bit_score:',score,'\n'
104 if locals().has_key('secscore'):
105 if secscore>100:
106 print '#',target,'$$$ Second possible H_fliC_type: ',Sec_Choice,'\n'
107 print '$$$ Second bit_score:',secscore,'\n'
108 if locals().has_key('thirdscore'):
109 if thirdscore>100:
110 print '#',target,'$$$ Third possible H_fliC_type: ',Third_Choice,'\n'
111 print '$$$ Third bit_score:',thirdscore,'\n'
112 else:
113 print '$$$ No fliC in',target
114 else:
115 score=1
116 print '$$$ No fliC (no file created) in',target
117
118
119
120 if os.path.getsize(fljB)>10:
121 os.system('makeblastdb -in '+database_fljB+' -out '+database_fljB+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already
122 os.system('blastx -query '+fljB+' -db '+database_fljB+'_db '+'-out '+'FljB_Htype_'+target+'.xml '+'-outfmt 5')
123 print target
124 fljB_XML='FljB_Htype_'+target+'.xml'
125 fljB_handle=open(fljB_XML)
126 records=NCBIXML.parse(fljB_handle)
127 fljB_records=list(records)
128 E_thresh=1e-10
129 hspbit=[]
130 alignmentlist=[]
131 for record in fljB_records:
132 for alignment in record.alignments:
133 hsp_bit_score=0
134 startlist=[]#
135 endlist=[]#
136 for hsp in alignment.hsps:
137 start=hsp.query_start#
138 end=hsp.query_end#
139 leng=abs(start-end)#
140 if hsp.expect<E_thresh:#
141 if start>end:#
142 temp=start#
143 start=end#
144 end=start#
145 if len(startlist)==0:#
146 hsp_bit_score=hsp_bit_score+hsp.bits#
147 startlist.append(start)#
148 endlist.append(end)#
149 else:#
150 for i in range(len(startlist)):#
151 if startlist[i]<start<endlist[i]:#
152 start=endlist[i]+1#
153 if startlist[i]<end<endlist[i]:#03112016
154 end=startlist[i]-1#
155 if end<start:#the new hsp was included in old hsp#
156 percentage=0#
157 else:
158 percentage=float(end-start)/leng#
159 startlist.append(start)#
160 endlist.append(end)#
161 hsp_bit_score=hsp_bit_score+percentage*hsp.bits#
162 alignment=alignment.hit_def+':'+str(hsp_bit_score)
163 hspbit.append(hsp_bit_score)
164 alignmentlist.append(alignment)
165 fljB_scorelist=dict(zip(alignmentlist,hspbit))
166
167 fljB_score=0
168 fljB_serotype=[]
169 fljB_seroscore=[]
170 for Htype in fljB_scorelist:
171 if fljB_scorelist[Htype]>fljB_score:
172 fljB_First_Choice=Htype
173 fljB_score=fljB_scorelist[Htype]
174 if locals().has_key('fljB_First_Choice'):
175 fljB_serotype.append(fljB_First_Choice.split("__")[0])
176 fljB_seroscore.append(fljB_score)
177 fljB_secscore=0
178 for Htype in fljB_scorelist:
179 if fljB_scorelist[Htype]>fljB_secscore and (Htype.split("__")[0] not in fljB_serotype):
180 fljB_Sec_Choice=Htype
181 fljB_secscore=fljB_scorelist[Htype]
182 if locals().has_key('fljB_Sec_Choice'):
183 fljB_serotype.append(fljB_Sec_Choice.split("__")[0])
184 fljB_seroscore.append(fljB_secscore)
185 fljB_thirdscore=0
186 for Htype in fljB_scorelist:
187 if fljB_scorelist[Htype]>fljB_thirdscore and (Htype.split("__")[0] not in fljB_serotype):
188 fljB_Third_Choice=Htype
189 fljB_thirdscore=fljB_scorelist[Htype]
190 if locals().has_key('fljB_Third_Choice'):
191 fljB_serotype.append(fljB_Third_Choice.split("__")[0])
192 fljB_seroscore.append(fljB_thirdscore)
193
194 if fljB_score>100:
195 print '#',target,'$$$ Most possible H_fljB_type: ',fljB_First_Choice,'\n'
196 print '$$$ Most bit_score:',fljB_score,'\n'
197 if locals().has_key('fljB_secscore'):
198 if fljB_secscore>100:
199 print '#',target,'$$$ Second possible H_fljB_type: ',fljB_Sec_Choice,'\n'
200 print '$$$ Second bit_score:',fljB_secscore,'\n'
201 if locals().has_key('fljB_thirdscore'):
202 if fljB_thirdscore>100:
203 print '#',target,'$$$ Third possible H_fljB_type: ',fljB_Third_Choice,'\n'
204 print '$$$ Third bit_score:',fljB_thirdscore,'\n'
205 else:
206 print '$$$ No fljB in',target
207 else:
208 fljB_score=1
209 print '$$$ No fljB (no file created) in',target
210
211
212 if score>100 and fljB_score>100:
213 fliC_sero=dict(zip(serotype,seroscore))
214 fljB_sero=dict(zip(fljB_serotype,fljB_seroscore))
215 combination=[]
216 combination_score=[]
217 for seroname in fliC_sero:
218 for fljB_seroname in fljB_sero:
219 for i in range(len(phase1)):
220 if phase1[i]==seroname and phase2[i]==fljB_seroname:
221 name=seroname+"_"+fljB_seroname
222 score=fliC_sero[seroname]+fljB_sero[fljB_seroname]
223 combination.append(name)
224 combination_score.append(score)
225 combinationlist=dict(zip(combination,combination_score)) #we can do the filteration here
226 final_dict=sorted(combinationlist.iteritems(), key=lambda d:d[1], reverse = True)
227 print "$$_H:Order:",final_dict
228 elif score>100 and fljB_score<100:
229 print "$$_H:No fljB, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice
230 elif score<100 and fljB_score>100:
231 print "$$_H:No fliC, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice
232 elif score==1 and fljB_score>100:
233 print "$$_H:No fliC (file) existed, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice
234 elif score==1 and fljB_score<100:
235 print "$$_H:No fliC (file) existed, and no fljB"
236 elif score>100 and fljB_score==1:
237 print "$$_H:No fljB (file) existed, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice
238 elif score<100 and fljB_score==1:
239 print "$$_H:No fljB (file) existed, and no fliC"
240 else:
241 print "$$_H:No fliC and fljB"
242
243
244 '''
245 E_thresh=1e-10
246 hspbit=[]
247 alignmentlist=[]
248 for record in fliC_records:
249 for alignment in record.alignments:
250 hsp_bit_score=0
251 for hsp in alignment.hsps:
252 if hsp.expect<E_thresh:
253 hsp_bit_score=hsp_bit_score+hsp.bits
254 alignment=alignment.hit_def+':'+str(hsp_bit_score)
255 hspbit.append(hsp_bit_score)
256 alignmentlist.append(alignment)
257
258 scorelist=dict(zip(alignmentlist,hspbit))
259 score=0
260 for Htype in scorelist:
261 if scorelist[Htype]>score:
262 First_Choice=Htype
263 score=scorelist[Htype]
264 if score>100:
265 print '#',target,'Most possible H_fliC_type: ',First_Choice,'\n'
266 print '#bit_score:',score,'\n'
267 else:
268 print '#No fliC in',target
269
270
271 E_thresh=1e-10
272 hspbit=[]
273 alignmentlist=[]
274 for record in fljB_records:
275 for alignment in record.alignments:
276 hsp_bit_score=0
277 for hsp in alignment.hsps:
278 if hsp.expect<E_thresh:
279 hsp_bit_score=hsp_bit_score+hsp.bits
280 alignment=alignment.hit_def+':'+str(hsp_bit_score)
281 hspbit.append(hsp_bit_score)
282 alignmentlist.append(alignment)
283
284 scorelist=dict(zip(alignmentlist,hspbit))
285 fljB_score=0
286 for Htype in scorelist:
287 if scorelist[Htype]>fljB_score:
288 First_Choice=Htype
289 fljB_score=scorelist[Htype]
290 if fljB_score>100:
291 print '#',target,'Most possible H_fljB_type: ',First_Choice,'\n'
292 print '#bit_score:',fljB_score,'\n'
293 else:
294 print '#No fljB in',target
295 '''
296