Mercurial > repos > estrain > seqsero_v1
comparison SeqSero/libs/H_combination_output_analysis.py @ 0:c577b57b7c74 draft
Uploaded
| author | estrain |
|---|---|
| date | Wed, 06 Dec 2017 15:59:29 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c577b57b7c74 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # "H_combination_output_analysis.py target.fasta fliCdatabase.fasta fljBdatabase.fasta" | |
| 3 # must have ispcr and primers of fliC and fljB at the same directory | |
| 4 | |
| 5 | |
| 6 import os | |
| 7 from Bio import SeqIO | |
| 8 import sys | |
| 9 from Bio.Blast import NCBIXML | |
| 10 from Initial_Conditions import phase1 | |
| 11 from Initial_Conditions import phase2 | |
| 12 | |
| 13 | |
| 14 | |
| 15 target=sys.argv[1] | |
| 16 database_fliC=sys.argv[2] | |
| 17 database_fljB=sys.argv[3] | |
| 18 output=target.split('.')[0]+'_out.fasta' | |
| 19 | |
| 20 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015 | |
| 21 database_path="database"###01/27/2015,database_path=dirpath+"/database" | |
| 22 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fliC.txt '+target+'_fliC.fa') | |
| 23 os.system(dirpath+'/isPcr maxSize=3000 tileSize=7 minPerfect=7 minGood=7 '+target+' '+dirpath+'/../primers/seq_primer_fljB.txt '+target+'_fljB.fa') | |
| 24 fliC=target+'_fliC.fa' | |
| 25 fljB=target+'_fljB.fa' | |
| 26 | |
| 27 if os.path.getsize(fliC)>10: | |
| 28 os.system('makeblastdb -in '+database_fliC+' -out '+database_fliC+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already | |
| 29 os.system('blastx -seg=no -query '+fliC+' -db '+database_fliC+'_db '+'-out '+'FliC_Htype_'+target+'.xml '+'-outfmt 5') | |
| 30 print target | |
| 31 fliC_XML='FliC_Htype_'+target+'.xml' | |
| 32 fliC_handle=open(fliC_XML) | |
| 33 records=NCBIXML.parse(fliC_handle) | |
| 34 fliC_records=list(records) | |
| 35 E_thresh=1e-10 | |
| 36 hspbit=[] | |
| 37 alignmentlist=[] | |
| 38 for record in fliC_records: | |
| 39 for alignment in record.alignments: | |
| 40 hsp_bit_score=0 | |
| 41 startlist=[]#the percentage algorithm don't consider one situation, the new hsp cover old hsp | |
| 42 endlist=[]# | |
| 43 for hsp in alignment.hsps: | |
| 44 start=hsp.query_start# | |
| 45 end=hsp.query_end# | |
| 46 leng=abs(start-end)# | |
| 47 if hsp.expect<E_thresh:# | |
| 48 if start>end:# | |
| 49 temp=start# | |
| 50 start=end# | |
| 51 end=start# | |
| 52 if len(startlist)==0:# | |
| 53 hsp_bit_score=hsp_bit_score+hsp.bits# | |
| 54 startlist.append(start)# | |
| 55 endlist.append(end)# | |
| 56 else:# | |
| 57 for i in range(len(startlist)):# | |
| 58 if startlist[i]<start<endlist[i]:# | |
| 59 start=endlist[i]+1# | |
| 60 if startlist[i]<end<endlist[i]:#03112016 | |
| 61 end=startlist[i]-1# | |
| 62 if end<start:#the new hsp was included in old hsp# | |
| 63 percentage=0# | |
| 64 else: | |
| 65 percentage=float(end-start)/leng# | |
| 66 startlist.append(start)# | |
| 67 endlist.append(end)# | |
| 68 hsp_bit_score=hsp_bit_score+percentage*hsp.bits# | |
| 69 alignment=alignment.hit_def+':'+str(hsp_bit_score) | |
| 70 hspbit.append(hsp_bit_score) | |
| 71 alignmentlist.append(alignment) | |
| 72 scorelist=dict(zip(alignmentlist,hspbit)) | |
| 73 score=0 | |
| 74 serotype=[] | |
| 75 seroscore=[] | |
| 76 for Htype in scorelist: | |
| 77 if scorelist[Htype]>score: | |
| 78 First_Choice=Htype | |
| 79 score=scorelist[Htype] | |
| 80 if locals().has_key('First_Choice'): | |
| 81 serotype.append(First_Choice.split("__")[0]) | |
| 82 seroscore.append(score) | |
| 83 secscore=0 | |
| 84 for Htype in scorelist: | |
| 85 | |
| 86 if scorelist[Htype]>secscore and (Htype.split("__")[0] not in serotype): | |
| 87 Sec_Choice=Htype | |
| 88 secscore=scorelist[Htype] | |
| 89 if locals().has_key('Sec_Choice'): | |
| 90 serotype.append(Sec_Choice.split("__")[0]) | |
| 91 seroscore.append(secscore) | |
| 92 thirdscore=0 | |
| 93 for Htype in scorelist: | |
| 94 if scorelist[Htype]>thirdscore and (Htype.split("__")[0] not in serotype): | |
| 95 Third_Choice=Htype | |
| 96 thirdscore=scorelist[Htype] | |
| 97 if locals().has_key('Third_Choice'): | |
| 98 serotype.append(Third_Choice.split("__")[0]) | |
| 99 seroscore.append(thirdscore) | |
| 100 print serotype,seroscore | |
| 101 if score>100: | |
| 102 print '#',target,'$$$ Most possible H_fliC_type: ',First_Choice,'\n' | |
| 103 print '$$$ bit_score:',score,'\n' | |
| 104 if locals().has_key('secscore'): | |
| 105 if secscore>100: | |
| 106 print '#',target,'$$$ Second possible H_fliC_type: ',Sec_Choice,'\n' | |
| 107 print '$$$ Second bit_score:',secscore,'\n' | |
| 108 if locals().has_key('thirdscore'): | |
| 109 if thirdscore>100: | |
| 110 print '#',target,'$$$ Third possible H_fliC_type: ',Third_Choice,'\n' | |
| 111 print '$$$ Third bit_score:',thirdscore,'\n' | |
| 112 else: | |
| 113 print '$$$ No fliC in',target | |
| 114 else: | |
| 115 score=1 | |
| 116 print '$$$ No fliC (no file created) in',target | |
| 117 | |
| 118 | |
| 119 | |
| 120 if os.path.getsize(fljB)>10: | |
| 121 os.system('makeblastdb -in '+database_fljB+' -out '+database_fljB+'_db '+'-dbtype prot')###01/28/2015,no need to add fljB address, because input is abs address already | |
| 122 os.system('blastx -query '+fljB+' -db '+database_fljB+'_db '+'-out '+'FljB_Htype_'+target+'.xml '+'-outfmt 5') | |
| 123 print target | |
| 124 fljB_XML='FljB_Htype_'+target+'.xml' | |
| 125 fljB_handle=open(fljB_XML) | |
| 126 records=NCBIXML.parse(fljB_handle) | |
| 127 fljB_records=list(records) | |
| 128 E_thresh=1e-10 | |
| 129 hspbit=[] | |
| 130 alignmentlist=[] | |
| 131 for record in fljB_records: | |
| 132 for alignment in record.alignments: | |
| 133 hsp_bit_score=0 | |
| 134 startlist=[]# | |
| 135 endlist=[]# | |
| 136 for hsp in alignment.hsps: | |
| 137 start=hsp.query_start# | |
| 138 end=hsp.query_end# | |
| 139 leng=abs(start-end)# | |
| 140 if hsp.expect<E_thresh:# | |
| 141 if start>end:# | |
| 142 temp=start# | |
| 143 start=end# | |
| 144 end=start# | |
| 145 if len(startlist)==0:# | |
| 146 hsp_bit_score=hsp_bit_score+hsp.bits# | |
| 147 startlist.append(start)# | |
| 148 endlist.append(end)# | |
| 149 else:# | |
| 150 for i in range(len(startlist)):# | |
| 151 if startlist[i]<start<endlist[i]:# | |
| 152 start=endlist[i]+1# | |
| 153 if startlist[i]<end<endlist[i]:#03112016 | |
| 154 end=startlist[i]-1# | |
| 155 if end<start:#the new hsp was included in old hsp# | |
| 156 percentage=0# | |
| 157 else: | |
| 158 percentage=float(end-start)/leng# | |
| 159 startlist.append(start)# | |
| 160 endlist.append(end)# | |
| 161 hsp_bit_score=hsp_bit_score+percentage*hsp.bits# | |
| 162 alignment=alignment.hit_def+':'+str(hsp_bit_score) | |
| 163 hspbit.append(hsp_bit_score) | |
| 164 alignmentlist.append(alignment) | |
| 165 fljB_scorelist=dict(zip(alignmentlist,hspbit)) | |
| 166 | |
| 167 fljB_score=0 | |
| 168 fljB_serotype=[] | |
| 169 fljB_seroscore=[] | |
| 170 for Htype in fljB_scorelist: | |
| 171 if fljB_scorelist[Htype]>fljB_score: | |
| 172 fljB_First_Choice=Htype | |
| 173 fljB_score=fljB_scorelist[Htype] | |
| 174 if locals().has_key('fljB_First_Choice'): | |
| 175 fljB_serotype.append(fljB_First_Choice.split("__")[0]) | |
| 176 fljB_seroscore.append(fljB_score) | |
| 177 fljB_secscore=0 | |
| 178 for Htype in fljB_scorelist: | |
| 179 if fljB_scorelist[Htype]>fljB_secscore and (Htype.split("__")[0] not in fljB_serotype): | |
| 180 fljB_Sec_Choice=Htype | |
| 181 fljB_secscore=fljB_scorelist[Htype] | |
| 182 if locals().has_key('fljB_Sec_Choice'): | |
| 183 fljB_serotype.append(fljB_Sec_Choice.split("__")[0]) | |
| 184 fljB_seroscore.append(fljB_secscore) | |
| 185 fljB_thirdscore=0 | |
| 186 for Htype in fljB_scorelist: | |
| 187 if fljB_scorelist[Htype]>fljB_thirdscore and (Htype.split("__")[0] not in fljB_serotype): | |
| 188 fljB_Third_Choice=Htype | |
| 189 fljB_thirdscore=fljB_scorelist[Htype] | |
| 190 if locals().has_key('fljB_Third_Choice'): | |
| 191 fljB_serotype.append(fljB_Third_Choice.split("__")[0]) | |
| 192 fljB_seroscore.append(fljB_thirdscore) | |
| 193 | |
| 194 if fljB_score>100: | |
| 195 print '#',target,'$$$ Most possible H_fljB_type: ',fljB_First_Choice,'\n' | |
| 196 print '$$$ Most bit_score:',fljB_score,'\n' | |
| 197 if locals().has_key('fljB_secscore'): | |
| 198 if fljB_secscore>100: | |
| 199 print '#',target,'$$$ Second possible H_fljB_type: ',fljB_Sec_Choice,'\n' | |
| 200 print '$$$ Second bit_score:',fljB_secscore,'\n' | |
| 201 if locals().has_key('fljB_thirdscore'): | |
| 202 if fljB_thirdscore>100: | |
| 203 print '#',target,'$$$ Third possible H_fljB_type: ',fljB_Third_Choice,'\n' | |
| 204 print '$$$ Third bit_score:',fljB_thirdscore,'\n' | |
| 205 else: | |
| 206 print '$$$ No fljB in',target | |
| 207 else: | |
| 208 fljB_score=1 | |
| 209 print '$$$ No fljB (no file created) in',target | |
| 210 | |
| 211 | |
| 212 if score>100 and fljB_score>100: | |
| 213 fliC_sero=dict(zip(serotype,seroscore)) | |
| 214 fljB_sero=dict(zip(fljB_serotype,fljB_seroscore)) | |
| 215 combination=[] | |
| 216 combination_score=[] | |
| 217 for seroname in fliC_sero: | |
| 218 for fljB_seroname in fljB_sero: | |
| 219 for i in range(len(phase1)): | |
| 220 if phase1[i]==seroname and phase2[i]==fljB_seroname: | |
| 221 name=seroname+"_"+fljB_seroname | |
| 222 score=fliC_sero[seroname]+fljB_sero[fljB_seroname] | |
| 223 combination.append(name) | |
| 224 combination_score.append(score) | |
| 225 combinationlist=dict(zip(combination,combination_score)) #we can do the filteration here | |
| 226 final_dict=sorted(combinationlist.iteritems(), key=lambda d:d[1], reverse = True) | |
| 227 print "$$_H:Order:",final_dict | |
| 228 elif score>100 and fljB_score<100: | |
| 229 print "$$_H:No fljB, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice | |
| 230 elif score<100 and fljB_score>100: | |
| 231 print "$$_H:No fliC, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice | |
| 232 elif score==1 and fljB_score>100: | |
| 233 print "$$_H:No fliC (file) existed, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice | |
| 234 elif score==1 and fljB_score<100: | |
| 235 print "$$_H:No fliC (file) existed, and no fljB" | |
| 236 elif score>100 and fljB_score==1: | |
| 237 print "$$_H:No fljB (file) existed, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice | |
| 238 elif score<100 and fljB_score==1: | |
| 239 print "$$_H:No fljB (file) existed, and no fliC" | |
| 240 else: | |
| 241 print "$$_H:No fliC and fljB" | |
| 242 | |
| 243 | |
| 244 ''' | |
| 245 E_thresh=1e-10 | |
| 246 hspbit=[] | |
| 247 alignmentlist=[] | |
| 248 for record in fliC_records: | |
| 249 for alignment in record.alignments: | |
| 250 hsp_bit_score=0 | |
| 251 for hsp in alignment.hsps: | |
| 252 if hsp.expect<E_thresh: | |
| 253 hsp_bit_score=hsp_bit_score+hsp.bits | |
| 254 alignment=alignment.hit_def+':'+str(hsp_bit_score) | |
| 255 hspbit.append(hsp_bit_score) | |
| 256 alignmentlist.append(alignment) | |
| 257 | |
| 258 scorelist=dict(zip(alignmentlist,hspbit)) | |
| 259 score=0 | |
| 260 for Htype in scorelist: | |
| 261 if scorelist[Htype]>score: | |
| 262 First_Choice=Htype | |
| 263 score=scorelist[Htype] | |
| 264 if score>100: | |
| 265 print '#',target,'Most possible H_fliC_type: ',First_Choice,'\n' | |
| 266 print '#bit_score:',score,'\n' | |
| 267 else: | |
| 268 print '#No fliC in',target | |
| 269 | |
| 270 | |
| 271 E_thresh=1e-10 | |
| 272 hspbit=[] | |
| 273 alignmentlist=[] | |
| 274 for record in fljB_records: | |
| 275 for alignment in record.alignments: | |
| 276 hsp_bit_score=0 | |
| 277 for hsp in alignment.hsps: | |
| 278 if hsp.expect<E_thresh: | |
| 279 hsp_bit_score=hsp_bit_score+hsp.bits | |
| 280 alignment=alignment.hit_def+':'+str(hsp_bit_score) | |
| 281 hspbit.append(hsp_bit_score) | |
| 282 alignmentlist.append(alignment) | |
| 283 | |
| 284 scorelist=dict(zip(alignmentlist,hspbit)) | |
| 285 fljB_score=0 | |
| 286 for Htype in scorelist: | |
| 287 if scorelist[Htype]>fljB_score: | |
| 288 First_Choice=Htype | |
| 289 fljB_score=scorelist[Htype] | |
| 290 if fljB_score>100: | |
| 291 print '#',target,'Most possible H_fljB_type: ',First_Choice,'\n' | |
| 292 print '#bit_score:',fljB_score,'\n' | |
| 293 else: | |
| 294 print '#No fljB in',target | |
| 295 ''' | |
| 296 |
