Mercurial > repos > estrain > seqsero_v1
comparison SeqSero/libs/H_combination_output_analysis.py @ 0:c577b57b7c74 draft
Uploaded
author | estrain |
---|---|
date | Wed, 06 Dec 2017 15:59:29 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c577b57b7c74 |
---|---|
1 #!/usr/bin/env python | |
2 # "H_combination_output_analysis.py target.fasta fliCdatabase.fasta fljBdatabase.fasta" | |
3 # must have ispcr and primers of fliC and fljB at the same directory | |
4 | |
5 | |
6 import os | |
7 from Bio import SeqIO | |
8 import sys | |
9 from Bio.Blast import NCBIXML | |
10 from Initial_Conditions import phase1 | |
11 from Initial_Conditions import phase2 | |
12 | |
13 | |
14 | |
15 target=sys.argv[1] | |
16 database_fliC=sys.argv[2] | |
17 database_fljB=sys.argv[3] | |
18 output=target.split('.')[0]+'_out.fasta' | |
19 | |
20 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015 | |
21 database_path="database"###01/27/2015,database_path=dirpath+"/database" | |
22 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fliC.txt '+target+'_fliC.fa') | |
23 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fljB.txt '+target+'_fljB.fa') | |
24 fliC=target+'_fliC.fa' | |
25 fljB=target+'_fljB.fa' | |
26 | |
27 if os.path.getsize(fliC)>10: | |
28 os.system('makeblastdb -in '+database_fliC+' -out '+database_fliC+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already | |
29 os.system('blastx -seg=no -query '+fliC+' -db '+database_fliC+'_db '+'-out '+'FliC_Htype_'+target+'.xml '+'-outfmt 5') | |
30 print target | |
31 fliC_XML='FliC_Htype_'+target+'.xml' | |
32 fliC_handle=open(fliC_XML) | |
33 records=NCBIXML.parse(fliC_handle) | |
34 fliC_records=list(records) | |
35 E_thresh=1e-10 | |
36 hspbit=[] | |
37 alignmentlist=[] | |
38 for record in fliC_records: | |
39 for alignment in record.alignments: | |
40 hsp_bit_score=0 | |
41 startlist=[]#the percentage algorithm don't consider one situation, the new hsp cover old hsp | |
42 endlist=[]# | |
43 for hsp in alignment.hsps: | |
44 start=hsp.query_start# | |
45 end=hsp.query_end# | |
46 leng=abs(start-end)# | |
47 if hsp.expect<E_thresh:# | |
48 if start>end:# | |
49 temp=start# | |
50 start=end# | |
51 end=start# | |
52 if len(startlist)==0:# | |
53 hsp_bit_score=hsp_bit_score+hsp.bits# | |
54 startlist.append(start)# | |
55 endlist.append(end)# | |
56 else:# | |
57 for i in range(len(startlist)):# | |
58 if startlist[i]<start<endlist[i]:# | |
59 start=endlist[i]+1# | |
60 if startlist[i]<end<endlist[i]:#03112016 | |
61 end=startlist[i]-1# | |
62 if end<start:#the new hsp was included in old hsp# | |
63 percentage=0# | |
64 else: | |
65 percentage=float(end-start)/leng# | |
66 startlist.append(start)# | |
67 endlist.append(end)# | |
68 hsp_bit_score=hsp_bit_score+percentage*hsp.bits# | |
69 alignment=alignment.hit_def+':'+str(hsp_bit_score) | |
70 hspbit.append(hsp_bit_score) | |
71 alignmentlist.append(alignment) | |
72 scorelist=dict(zip(alignmentlist,hspbit)) | |
73 score=0 | |
74 serotype=[] | |
75 seroscore=[] | |
76 for Htype in scorelist: | |
77 if scorelist[Htype]>score: | |
78 First_Choice=Htype | |
79 score=scorelist[Htype] | |
80 if locals().has_key('First_Choice'): | |
81 serotype.append(First_Choice.split("__")[0]) | |
82 seroscore.append(score) | |
83 secscore=0 | |
84 for Htype in scorelist: | |
85 | |
86 if scorelist[Htype]>secscore and (Htype.split("__")[0] not in serotype): | |
87 Sec_Choice=Htype | |
88 secscore=scorelist[Htype] | |
89 if locals().has_key('Sec_Choice'): | |
90 serotype.append(Sec_Choice.split("__")[0]) | |
91 seroscore.append(secscore) | |
92 thirdscore=0 | |
93 for Htype in scorelist: | |
94 if scorelist[Htype]>thirdscore and (Htype.split("__")[0] not in serotype): | |
95 Third_Choice=Htype | |
96 thirdscore=scorelist[Htype] | |
97 if locals().has_key('Third_Choice'): | |
98 serotype.append(Third_Choice.split("__")[0]) | |
99 seroscore.append(thirdscore) | |
100 print serotype,seroscore | |
101 if score>100: | |
102 print '#',target,'$$$ Most possible H_fliC_type: ',First_Choice,'\n' | |
103 print '$$$ bit_score:',score,'\n' | |
104 if locals().has_key('secscore'): | |
105 if secscore>100: | |
106 print '#',target,'$$$ Second possible H_fliC_type: ',Sec_Choice,'\n' | |
107 print '$$$ Second bit_score:',secscore,'\n' | |
108 if locals().has_key('thirdscore'): | |
109 if thirdscore>100: | |
110 print '#',target,'$$$ Third possible H_fliC_type: ',Third_Choice,'\n' | |
111 print '$$$ Third bit_score:',thirdscore,'\n' | |
112 else: | |
113 print '$$$ No fliC in',target | |
114 else: | |
115 score=1 | |
116 print '$$$ No fliC (no file created) in',target | |
117 | |
118 | |
119 | |
120 if os.path.getsize(fljB)>10: | |
121 os.system('makeblastdb -in '+database_fljB+' -out '+database_fljB+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already | |
122 os.system('blastx -query '+fljB+' -db '+database_fljB+'_db '+'-out '+'FljB_Htype_'+target+'.xml '+'-outfmt 5') | |
123 print target | |
124 fljB_XML='FljB_Htype_'+target+'.xml' | |
125 fljB_handle=open(fljB_XML) | |
126 records=NCBIXML.parse(fljB_handle) | |
127 fljB_records=list(records) | |
128 E_thresh=1e-10 | |
129 hspbit=[] | |
130 alignmentlist=[] | |
131 for record in fljB_records: | |
132 for alignment in record.alignments: | |
133 hsp_bit_score=0 | |
134 startlist=[]# | |
135 endlist=[]# | |
136 for hsp in alignment.hsps: | |
137 start=hsp.query_start# | |
138 end=hsp.query_end# | |
139 leng=abs(start-end)# | |
140 if hsp.expect<E_thresh:# | |
141 if start>end:# | |
142 temp=start# | |
143 start=end# | |
144 end=start# | |
145 if len(startlist)==0:# | |
146 hsp_bit_score=hsp_bit_score+hsp.bits# | |
147 startlist.append(start)# | |
148 endlist.append(end)# | |
149 else:# | |
150 for i in range(len(startlist)):# | |
151 if startlist[i]<start<endlist[i]:# | |
152 start=endlist[i]+1# | |
153 if startlist[i]<end<endlist[i]:#03112016 | |
154 end=startlist[i]-1# | |
155 if end<start:#the new hsp was included in old hsp# | |
156 percentage=0# | |
157 else: | |
158 percentage=float(end-start)/leng# | |
159 startlist.append(start)# | |
160 endlist.append(end)# | |
161 hsp_bit_score=hsp_bit_score+percentage*hsp.bits# | |
162 alignment=alignment.hit_def+':'+str(hsp_bit_score) | |
163 hspbit.append(hsp_bit_score) | |
164 alignmentlist.append(alignment) | |
165 fljB_scorelist=dict(zip(alignmentlist,hspbit)) | |
166 | |
167 fljB_score=0 | |
168 fljB_serotype=[] | |
169 fljB_seroscore=[] | |
170 for Htype in fljB_scorelist: | |
171 if fljB_scorelist[Htype]>fljB_score: | |
172 fljB_First_Choice=Htype | |
173 fljB_score=fljB_scorelist[Htype] | |
174 if locals().has_key('fljB_First_Choice'): | |
175 fljB_serotype.append(fljB_First_Choice.split("__")[0]) | |
176 fljB_seroscore.append(fljB_score) | |
177 fljB_secscore=0 | |
178 for Htype in fljB_scorelist: | |
179 if fljB_scorelist[Htype]>fljB_secscore and (Htype.split("__")[0] not in fljB_serotype): | |
180 fljB_Sec_Choice=Htype | |
181 fljB_secscore=fljB_scorelist[Htype] | |
182 if locals().has_key('fljB_Sec_Choice'): | |
183 fljB_serotype.append(fljB_Sec_Choice.split("__")[0]) | |
184 fljB_seroscore.append(fljB_secscore) | |
185 fljB_thirdscore=0 | |
186 for Htype in fljB_scorelist: | |
187 if fljB_scorelist[Htype]>fljB_thirdscore and (Htype.split("__")[0] not in fljB_serotype): | |
188 fljB_Third_Choice=Htype | |
189 fljB_thirdscore=fljB_scorelist[Htype] | |
190 if locals().has_key('fljB_Third_Choice'): | |
191 fljB_serotype.append(fljB_Third_Choice.split("__")[0]) | |
192 fljB_seroscore.append(fljB_thirdscore) | |
193 | |
194 if fljB_score>100: | |
195 print '#',target,'$$$ Most possible H_fljB_type: ',fljB_First_Choice,'\n' | |
196 print '$$$ Most bit_score:',fljB_score,'\n' | |
197 if locals().has_key('fljB_secscore'): | |
198 if fljB_secscore>100: | |
199 print '#',target,'$$$ Second possible H_fljB_type: ',fljB_Sec_Choice,'\n' | |
200 print '$$$ Second bit_score:',fljB_secscore,'\n' | |
201 if locals().has_key('fljB_thirdscore'): | |
202 if fljB_thirdscore>100: | |
203 print '#',target,'$$$ Third possible H_fljB_type: ',fljB_Third_Choice,'\n' | |
204 print '$$$ Third bit_score:',fljB_thirdscore,'\n' | |
205 else: | |
206 print '$$$ No fljB in',target | |
207 else: | |
208 fljB_score=1 | |
209 print '$$$ No fljB (no file created) in',target | |
210 | |
211 | |
212 if score>100 and fljB_score>100: | |
213 fliC_sero=dict(zip(serotype,seroscore)) | |
214 fljB_sero=dict(zip(fljB_serotype,fljB_seroscore)) | |
215 combination=[] | |
216 combination_score=[] | |
217 for seroname in fliC_sero: | |
218 for fljB_seroname in fljB_sero: | |
219 for i in range(len(phase1)): | |
220 if phase1[i]==seroname and phase2[i]==fljB_seroname: | |
221 name=seroname+"_"+fljB_seroname | |
222 score=fliC_sero[seroname]+fljB_sero[fljB_seroname] | |
223 combination.append(name) | |
224 combination_score.append(score) | |
225 combinationlist=dict(zip(combination,combination_score)) #we can do the filteration here | |
226 final_dict=sorted(combinationlist.iteritems(), key=lambda d:d[1], reverse = True) | |
227 print "$$_H:Order:",final_dict | |
228 elif score>100 and fljB_score<100: | |
229 print "$$_H:No fljB, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice | |
230 elif score<100 and fljB_score>100: | |
231 print "$$_H:No fliC, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice | |
232 elif score==1 and fljB_score>100: | |
233 print "$$_H:No fliC (file) existed, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice | |
234 elif score==1 and fljB_score<100: | |
235 print "$$_H:No fliC (file) existed, and no fljB" | |
236 elif score>100 and fljB_score==1: | |
237 print "$$_H:No fljB (file) existed, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice | |
238 elif score<100 and fljB_score==1: | |
239 print "$$_H:No fljB (file) existed, and no fliC" | |
240 else: | |
241 print "$$_H:No fliC and fljB" | |
242 | |
243 | |
244 ''' | |
245 E_thresh=1e-10 | |
246 hspbit=[] | |
247 alignmentlist=[] | |
248 for record in fliC_records: | |
249 for alignment in record.alignments: | |
250 hsp_bit_score=0 | |
251 for hsp in alignment.hsps: | |
252 if hsp.expect<E_thresh: | |
253 hsp_bit_score=hsp_bit_score+hsp.bits | |
254 alignment=alignment.hit_def+':'+str(hsp_bit_score) | |
255 hspbit.append(hsp_bit_score) | |
256 alignmentlist.append(alignment) | |
257 | |
258 scorelist=dict(zip(alignmentlist,hspbit)) | |
259 score=0 | |
260 for Htype in scorelist: | |
261 if scorelist[Htype]>score: | |
262 First_Choice=Htype | |
263 score=scorelist[Htype] | |
264 if score>100: | |
265 print '#',target,'Most possible H_fliC_type: ',First_Choice,'\n' | |
266 print '#bit_score:',score,'\n' | |
267 else: | |
268 print '#No fliC in',target | |
269 | |
270 | |
271 E_thresh=1e-10 | |
272 hspbit=[] | |
273 alignmentlist=[] | |
274 for record in fljB_records: | |
275 for alignment in record.alignments: | |
276 hsp_bit_score=0 | |
277 for hsp in alignment.hsps: | |
278 if hsp.expect<E_thresh: | |
279 hsp_bit_score=hsp_bit_score+hsp.bits | |
280 alignment=alignment.hit_def+':'+str(hsp_bit_score) | |
281 hspbit.append(hsp_bit_score) | |
282 alignmentlist.append(alignment) | |
283 | |
284 scorelist=dict(zip(alignmentlist,hspbit)) | |
285 fljB_score=0 | |
286 for Htype in scorelist: | |
287 if scorelist[Htype]>fljB_score: | |
288 First_Choice=Htype | |
289 fljB_score=scorelist[Htype] | |
290 if fljB_score>100: | |
291 print '#',target,'Most possible H_fljB_type: ',First_Choice,'\n' | |
292 print '#bit_score:',fljB_score,'\n' | |
293 else: | |
294 print '#No fljB in',target | |
295 ''' | |
296 |